Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 6048c986

History | View | Annotate | Download (168.4 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0613,W0201
25

    
26
import os
27
import os.path
28
import sha
29
import time
30
import tempfile
31
import re
32
import platform
33

    
34
from ganeti import rpc
35
from ganeti import ssh
36
from ganeti import logger
37
from ganeti import utils
38
from ganeti import errors
39
from ganeti import hypervisor
40
from ganeti import locking
41
from ganeti import config
42
from ganeti import constants
43
from ganeti import objects
44
from ganeti import opcodes
45
from ganeti import ssconf
46
from ganeti import serializer
47

    
48

    
49
class LogicalUnit(object):
50
  """Logical Unit base class.
51

52
  Subclasses must follow these rules:
53
    - implement CheckPrereq which also fills in the opcode instance
54
      with all the fields (even if as None)
55
    - implement Exec
56
    - implement BuildHooksEnv
57
    - redefine HPATH and HTYPE
58
    - optionally redefine their run requirements:
59
        REQ_MASTER: the LU needs to run on the master node
60
        REQ_WSSTORE: the LU needs a writable SimpleStore
61
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
62

63
  Note that all commands require root permissions.
64

65
  """
66
  HPATH = None
67
  HTYPE = None
68
  _OP_REQP = []
69
  REQ_MASTER = True
70
  REQ_WSSTORE = False
71
  REQ_BGL = True
72

    
73
  def __init__(self, processor, op, context, sstore):
74
    """Constructor for LogicalUnit.
75

76
    This needs to be overriden in derived classes in order to check op
77
    validity.
78

79
    """
80
    self.proc = processor
81
    self.op = op
82
    self.cfg = context.cfg
83
    self.sstore = sstore
84
    self.context = context
85
    self.__ssh = None
86

    
87
    for attr_name in self._OP_REQP:
88
      attr_val = getattr(op, attr_name, None)
89
      if attr_val is None:
90
        raise errors.OpPrereqError("Required parameter '%s' missing" %
91
                                   attr_name)
92

    
93
    if not self.cfg.IsCluster():
94
      raise errors.OpPrereqError("Cluster not initialized yet,"
95
                                 " use 'gnt-cluster init' first.")
96
    if self.REQ_MASTER:
97
      master = sstore.GetMasterNode()
98
      if master != utils.HostInfo().name:
99
        raise errors.OpPrereqError("Commands must be run on the master"
100
                                   " node %s" % master)
101

    
102
  def __GetSSH(self):
103
    """Returns the SshRunner object
104

105
    """
106
    if not self.__ssh:
107
      self.__ssh = ssh.SshRunner(self.sstore)
108
    return self.__ssh
109

    
110
  ssh = property(fget=__GetSSH)
111

    
112
  def CheckPrereq(self):
113
    """Check prerequisites for this LU.
114

115
    This method should check that the prerequisites for the execution
116
    of this LU are fulfilled. It can do internode communication, but
117
    it should be idempotent - no cluster or system changes are
118
    allowed.
119

120
    The method should raise errors.OpPrereqError in case something is
121
    not fulfilled. Its return value is ignored.
122

123
    This method should also update all the parameters of the opcode to
124
    their canonical form; e.g. a short node name must be fully
125
    expanded after this method has successfully completed (so that
126
    hooks, logging, etc. work correctly).
127

128
    """
129
    raise NotImplementedError
130

    
131
  def Exec(self, feedback_fn):
132
    """Execute the LU.
133

134
    This method should implement the actual work. It should raise
135
    errors.OpExecError for failures that are somewhat dealt with in
136
    code, or expected.
137

138
    """
139
    raise NotImplementedError
140

    
141
  def BuildHooksEnv(self):
142
    """Build hooks environment for this LU.
143

144
    This method should return a three-node tuple consisting of: a dict
145
    containing the environment that will be used for running the
146
    specific hook for this LU, a list of node names on which the hook
147
    should run before the execution, and a list of node names on which
148
    the hook should run after the execution.
149

150
    The keys of the dict must not have 'GANETI_' prefixed as this will
151
    be handled in the hooks runner. Also note additional keys will be
152
    added by the hooks runner. If the LU doesn't define any
153
    environment, an empty dict (and not None) should be returned.
154

155
    No nodes should be returned as an empty list (and not None).
156

157
    Note that if the HPATH for a LU class is None, this function will
158
    not be called.
159

160
    """
161
    raise NotImplementedError
162

    
163
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
164
    """Notify the LU about the results of its hooks.
165

166
    This method is called every time a hooks phase is executed, and notifies
167
    the Logical Unit about the hooks' result. The LU can then use it to alter
168
    its result based on the hooks.  By default the method does nothing and the
169
    previous result is passed back unchanged but any LU can define it if it
170
    wants to use the local cluster hook-scripts somehow.
171

172
    Args:
173
      phase: the hooks phase that has just been run
174
      hooks_results: the results of the multi-node hooks rpc call
175
      feedback_fn: function to send feedback back to the caller
176
      lu_result: the previous result this LU had, or None in the PRE phase.
177

178
    """
179
    return lu_result
180

    
181

    
182
class NoHooksLU(LogicalUnit):
183
  """Simple LU which runs no hooks.
184

185
  This LU is intended as a parent for other LogicalUnits which will
186
  run no hooks, in order to reduce duplicate code.
187

188
  """
189
  HPATH = None
190
  HTYPE = None
191

    
192

    
193
def _GetWantedNodes(lu, nodes):
194
  """Returns list of checked and expanded node names.
195

196
  Args:
197
    nodes: List of nodes (strings) or None for all
198

199
  """
200
  if not isinstance(nodes, list):
201
    raise errors.OpPrereqError("Invalid argument type 'nodes'")
202

    
203
  if nodes:
204
    wanted = []
205

    
206
    for name in nodes:
207
      node = lu.cfg.ExpandNodeName(name)
208
      if node is None:
209
        raise errors.OpPrereqError("No such node name '%s'" % name)
210
      wanted.append(node)
211

    
212
  else:
213
    wanted = lu.cfg.GetNodeList()
214
  return utils.NiceSort(wanted)
215

    
216

    
217
def _GetWantedInstances(lu, instances):
218
  """Returns list of checked and expanded instance names.
219

220
  Args:
221
    instances: List of instances (strings) or None for all
222

223
  """
224
  if not isinstance(instances, list):
225
    raise errors.OpPrereqError("Invalid argument type 'instances'")
226

    
227
  if instances:
228
    wanted = []
229

    
230
    for name in instances:
231
      instance = lu.cfg.ExpandInstanceName(name)
232
      if instance is None:
233
        raise errors.OpPrereqError("No such instance name '%s'" % name)
234
      wanted.append(instance)
235

    
236
  else:
237
    wanted = lu.cfg.GetInstanceList()
238
  return utils.NiceSort(wanted)
239

    
240

    
241
def _CheckOutputFields(static, dynamic, selected):
242
  """Checks whether all selected fields are valid.
243

244
  Args:
245
    static: Static fields
246
    dynamic: Dynamic fields
247

248
  """
249
  static_fields = frozenset(static)
250
  dynamic_fields = frozenset(dynamic)
251

    
252
  all_fields = static_fields | dynamic_fields
253

    
254
  if not all_fields.issuperset(selected):
255
    raise errors.OpPrereqError("Unknown output fields selected: %s"
256
                               % ",".join(frozenset(selected).
257
                                          difference(all_fields)))
258

    
259

    
260
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
261
                          memory, vcpus, nics):
262
  """Builds instance related env variables for hooks from single variables.
263

264
  Args:
265
    secondary_nodes: List of secondary nodes as strings
266
  """
267
  env = {
268
    "OP_TARGET": name,
269
    "INSTANCE_NAME": name,
270
    "INSTANCE_PRIMARY": primary_node,
271
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
272
    "INSTANCE_OS_TYPE": os_type,
273
    "INSTANCE_STATUS": status,
274
    "INSTANCE_MEMORY": memory,
275
    "INSTANCE_VCPUS": vcpus,
276
  }
277

    
278
  if nics:
279
    nic_count = len(nics)
280
    for idx, (ip, bridge, mac) in enumerate(nics):
281
      if ip is None:
282
        ip = ""
283
      env["INSTANCE_NIC%d_IP" % idx] = ip
284
      env["INSTANCE_NIC%d_BRIDGE" % idx] = bridge
285
      env["INSTANCE_NIC%d_HWADDR" % idx] = mac
286
  else:
287
    nic_count = 0
288

    
289
  env["INSTANCE_NIC_COUNT"] = nic_count
290

    
291
  return env
292

    
293

    
294
def _BuildInstanceHookEnvByObject(instance, override=None):
295
  """Builds instance related env variables for hooks from an object.
296

297
  Args:
298
    instance: objects.Instance object of instance
299
    override: dict of values to override
300
  """
301
  args = {
302
    'name': instance.name,
303
    'primary_node': instance.primary_node,
304
    'secondary_nodes': instance.secondary_nodes,
305
    'os_type': instance.os,
306
    'status': instance.os,
307
    'memory': instance.memory,
308
    'vcpus': instance.vcpus,
309
    'nics': [(nic.ip, nic.bridge, nic.mac) for nic in instance.nics],
310
  }
311
  if override:
312
    args.update(override)
313
  return _BuildInstanceHookEnv(**args)
314

    
315

    
316
def _CheckInstanceBridgesExist(instance):
317
  """Check that the brigdes needed by an instance exist.
318

319
  """
320
  # check bridges existance
321
  brlist = [nic.bridge for nic in instance.nics]
322
  if not rpc.call_bridges_exist(instance.primary_node, brlist):
323
    raise errors.OpPrereqError("one or more target bridges %s does not"
324
                               " exist on destination node '%s'" %
325
                               (brlist, instance.primary_node))
326

    
327

    
328
class LUDestroyCluster(NoHooksLU):
329
  """Logical unit for destroying the cluster.
330

331
  """
332
  _OP_REQP = []
333

    
334
  def CheckPrereq(self):
335
    """Check prerequisites.
336

337
    This checks whether the cluster is empty.
338

339
    Any errors are signalled by raising errors.OpPrereqError.
340

341
    """
342
    master = self.sstore.GetMasterNode()
343

    
344
    nodelist = self.cfg.GetNodeList()
345
    if len(nodelist) != 1 or nodelist[0] != master:
346
      raise errors.OpPrereqError("There are still %d node(s) in"
347
                                 " this cluster." % (len(nodelist) - 1))
348
    instancelist = self.cfg.GetInstanceList()
349
    if instancelist:
350
      raise errors.OpPrereqError("There are still %d instance(s) in"
351
                                 " this cluster." % len(instancelist))
352

    
353
  def Exec(self, feedback_fn):
354
    """Destroys the cluster.
355

356
    """
357
    master = self.sstore.GetMasterNode()
358
    if not rpc.call_node_stop_master(master):
359
      raise errors.OpExecError("Could not disable the master role")
360
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
361
    utils.CreateBackup(priv_key)
362
    utils.CreateBackup(pub_key)
363
    rpc.call_node_leave_cluster(master)
364

    
365

    
366
class LUVerifyCluster(LogicalUnit):
367
  """Verifies the cluster status.
368

369
  """
370
  HPATH = "cluster-verify"
371
  HTYPE = constants.HTYPE_CLUSTER
372
  _OP_REQP = ["skip_checks"]
373

    
374
  def _VerifyNode(self, node, file_list, local_cksum, vglist, node_result,
375
                  remote_version, feedback_fn):
376
    """Run multiple tests against a node.
377

378
    Test list:
379
      - compares ganeti version
380
      - checks vg existance and size > 20G
381
      - checks config file checksum
382
      - checks ssh to other nodes
383

384
    Args:
385
      node: name of the node to check
386
      file_list: required list of files
387
      local_cksum: dictionary of local files and their checksums
388

389
    """
390
    # compares ganeti version
391
    local_version = constants.PROTOCOL_VERSION
392
    if not remote_version:
393
      feedback_fn("  - ERROR: connection to %s failed" % (node))
394
      return True
395

    
396
    if local_version != remote_version:
397
      feedback_fn("  - ERROR: sw version mismatch: master %s, node(%s) %s" %
398
                      (local_version, node, remote_version))
399
      return True
400

    
401
    # checks vg existance and size > 20G
402

    
403
    bad = False
404
    if not vglist:
405
      feedback_fn("  - ERROR: unable to check volume groups on node %s." %
406
                      (node,))
407
      bad = True
408
    else:
409
      vgstatus = utils.CheckVolumeGroupSize(vglist, self.cfg.GetVGName(),
410
                                            constants.MIN_VG_SIZE)
411
      if vgstatus:
412
        feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
413
        bad = True
414

    
415
    # checks config file checksum
416
    # checks ssh to any
417

    
418
    if 'filelist' not in node_result:
419
      bad = True
420
      feedback_fn("  - ERROR: node hasn't returned file checksum data")
421
    else:
422
      remote_cksum = node_result['filelist']
423
      for file_name in file_list:
424
        if file_name not in remote_cksum:
425
          bad = True
426
          feedback_fn("  - ERROR: file '%s' missing" % file_name)
427
        elif remote_cksum[file_name] != local_cksum[file_name]:
428
          bad = True
429
          feedback_fn("  - ERROR: file '%s' has wrong checksum" % file_name)
430

    
431
    if 'nodelist' not in node_result:
432
      bad = True
433
      feedback_fn("  - ERROR: node hasn't returned node ssh connectivity data")
434
    else:
435
      if node_result['nodelist']:
436
        bad = True
437
        for node in node_result['nodelist']:
438
          feedback_fn("  - ERROR: ssh communication with node '%s': %s" %
439
                          (node, node_result['nodelist'][node]))
440
    if 'node-net-test' not in node_result:
441
      bad = True
442
      feedback_fn("  - ERROR: node hasn't returned node tcp connectivity data")
443
    else:
444
      if node_result['node-net-test']:
445
        bad = True
446
        nlist = utils.NiceSort(node_result['node-net-test'].keys())
447
        for node in nlist:
448
          feedback_fn("  - ERROR: tcp communication with node '%s': %s" %
449
                          (node, node_result['node-net-test'][node]))
450

    
451
    hyp_result = node_result.get('hypervisor', None)
452
    if hyp_result is not None:
453
      feedback_fn("  - ERROR: hypervisor verify failure: '%s'" % hyp_result)
454
    return bad
455

    
456
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
457
                      node_instance, feedback_fn):
458
    """Verify an instance.
459

460
    This function checks to see if the required block devices are
461
    available on the instance's node.
462

463
    """
464
    bad = False
465

    
466
    node_current = instanceconfig.primary_node
467

    
468
    node_vol_should = {}
469
    instanceconfig.MapLVsByNode(node_vol_should)
470

    
471
    for node in node_vol_should:
472
      for volume in node_vol_should[node]:
473
        if node not in node_vol_is or volume not in node_vol_is[node]:
474
          feedback_fn("  - ERROR: volume %s missing on node %s" %
475
                          (volume, node))
476
          bad = True
477

    
478
    if not instanceconfig.status == 'down':
479
      if (node_current not in node_instance or
480
          not instance in node_instance[node_current]):
481
        feedback_fn("  - ERROR: instance %s not running on node %s" %
482
                        (instance, node_current))
483
        bad = True
484

    
485
    for node in node_instance:
486
      if (not node == node_current):
487
        if instance in node_instance[node]:
488
          feedback_fn("  - ERROR: instance %s should not run on node %s" %
489
                          (instance, node))
490
          bad = True
491

    
492
    return bad
493

    
494
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
495
    """Verify if there are any unknown volumes in the cluster.
496

497
    The .os, .swap and backup volumes are ignored. All other volumes are
498
    reported as unknown.
499

500
    """
501
    bad = False
502

    
503
    for node in node_vol_is:
504
      for volume in node_vol_is[node]:
505
        if node not in node_vol_should or volume not in node_vol_should[node]:
506
          feedback_fn("  - ERROR: volume %s on node %s should not exist" %
507
                      (volume, node))
508
          bad = True
509
    return bad
510

    
511
  def _VerifyOrphanInstances(self, instancelist, node_instance, feedback_fn):
512
    """Verify the list of running instances.
513

514
    This checks what instances are running but unknown to the cluster.
515

516
    """
517
    bad = False
518
    for node in node_instance:
519
      for runninginstance in node_instance[node]:
520
        if runninginstance not in instancelist:
521
          feedback_fn("  - ERROR: instance %s on node %s should not exist" %
522
                          (runninginstance, node))
523
          bad = True
524
    return bad
525

    
526
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg, feedback_fn):
527
    """Verify N+1 Memory Resilience.
528

529
    Check that if one single node dies we can still start all the instances it
530
    was primary for.
531

532
    """
533
    bad = False
534

    
535
    for node, nodeinfo in node_info.iteritems():
536
      # This code checks that every node which is now listed as secondary has
537
      # enough memory to host all instances it is supposed to should a single
538
      # other node in the cluster fail.
539
      # FIXME: not ready for failover to an arbitrary node
540
      # FIXME: does not support file-backed instances
541
      # WARNING: we currently take into account down instances as well as up
542
      # ones, considering that even if they're down someone might want to start
543
      # them even in the event of a node failure.
544
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
545
        needed_mem = 0
546
        for instance in instances:
547
          needed_mem += instance_cfg[instance].memory
548
        if nodeinfo['mfree'] < needed_mem:
549
          feedback_fn("  - ERROR: not enough memory on node %s to accomodate"
550
                      " failovers should node %s fail" % (node, prinode))
551
          bad = True
552
    return bad
553

    
554
  def CheckPrereq(self):
555
    """Check prerequisites.
556

557
    Transform the list of checks we're going to skip into a set and check that
558
    all its members are valid.
559

560
    """
561
    self.skip_set = frozenset(self.op.skip_checks)
562
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
563
      raise errors.OpPrereqError("Invalid checks to be skipped specified")
564

    
565
  def BuildHooksEnv(self):
566
    """Build hooks env.
567

568
    Cluster-Verify hooks just rone in the post phase and their failure makes
569
    the output be logged in the verify output and the verification to fail.
570

571
    """
572
    all_nodes = self.cfg.GetNodeList()
573
    # TODO: populate the environment with useful information for verify hooks
574
    env = {}
575
    return env, [], all_nodes
576

    
577
  def Exec(self, feedback_fn):
578
    """Verify integrity of cluster, performing various test on nodes.
579

580
    """
581
    bad = False
582
    feedback_fn("* Verifying global settings")
583
    for msg in self.cfg.VerifyConfig():
584
      feedback_fn("  - ERROR: %s" % msg)
585

    
586
    vg_name = self.cfg.GetVGName()
587
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
588
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
589
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
590
    i_non_redundant = [] # Non redundant instances
591
    node_volume = {}
592
    node_instance = {}
593
    node_info = {}
594
    instance_cfg = {}
595

    
596
    # FIXME: verify OS list
597
    # do local checksums
598
    file_names = list(self.sstore.GetFileList())
599
    file_names.append(constants.SSL_CERT_FILE)
600
    file_names.append(constants.CLUSTER_CONF_FILE)
601
    local_checksums = utils.FingerprintFiles(file_names)
602

    
603
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
604
    all_volumeinfo = rpc.call_volume_list(nodelist, vg_name)
605
    all_instanceinfo = rpc.call_instance_list(nodelist)
606
    all_vglist = rpc.call_vg_list(nodelist)
607
    node_verify_param = {
608
      'filelist': file_names,
609
      'nodelist': nodelist,
610
      'hypervisor': None,
611
      'node-net-test': [(node.name, node.primary_ip, node.secondary_ip)
612
                        for node in nodeinfo]
613
      }
614
    all_nvinfo = rpc.call_node_verify(nodelist, node_verify_param)
615
    all_rversion = rpc.call_version(nodelist)
616
    all_ninfo = rpc.call_node_info(nodelist, self.cfg.GetVGName())
617

    
618
    for node in nodelist:
619
      feedback_fn("* Verifying node %s" % node)
620
      result = self._VerifyNode(node, file_names, local_checksums,
621
                                all_vglist[node], all_nvinfo[node],
622
                                all_rversion[node], feedback_fn)
623
      bad = bad or result
624

    
625
      # node_volume
626
      volumeinfo = all_volumeinfo[node]
627

    
628
      if isinstance(volumeinfo, basestring):
629
        feedback_fn("  - ERROR: LVM problem on node %s: %s" %
630
                    (node, volumeinfo[-400:].encode('string_escape')))
631
        bad = True
632
        node_volume[node] = {}
633
      elif not isinstance(volumeinfo, dict):
634
        feedback_fn("  - ERROR: connection to %s failed" % (node,))
635
        bad = True
636
        continue
637
      else:
638
        node_volume[node] = volumeinfo
639

    
640
      # node_instance
641
      nodeinstance = all_instanceinfo[node]
642
      if type(nodeinstance) != list:
643
        feedback_fn("  - ERROR: connection to %s failed" % (node,))
644
        bad = True
645
        continue
646

    
647
      node_instance[node] = nodeinstance
648

    
649
      # node_info
650
      nodeinfo = all_ninfo[node]
651
      if not isinstance(nodeinfo, dict):
652
        feedback_fn("  - ERROR: connection to %s failed" % (node,))
653
        bad = True
654
        continue
655

    
656
      try:
657
        node_info[node] = {
658
          "mfree": int(nodeinfo['memory_free']),
659
          "dfree": int(nodeinfo['vg_free']),
660
          "pinst": [],
661
          "sinst": [],
662
          # dictionary holding all instances this node is secondary for,
663
          # grouped by their primary node. Each key is a cluster node, and each
664
          # value is a list of instances which have the key as primary and the
665
          # current node as secondary.  this is handy to calculate N+1 memory
666
          # availability if you can only failover from a primary to its
667
          # secondary.
668
          "sinst-by-pnode": {},
669
        }
670
      except ValueError:
671
        feedback_fn("  - ERROR: invalid value returned from node %s" % (node,))
672
        bad = True
673
        continue
674

    
675
    node_vol_should = {}
676

    
677
    for instance in instancelist:
678
      feedback_fn("* Verifying instance %s" % instance)
679
      inst_config = self.cfg.GetInstanceInfo(instance)
680
      result =  self._VerifyInstance(instance, inst_config, node_volume,
681
                                     node_instance, feedback_fn)
682
      bad = bad or result
683

    
684
      inst_config.MapLVsByNode(node_vol_should)
685

    
686
      instance_cfg[instance] = inst_config
687

    
688
      pnode = inst_config.primary_node
689
      if pnode in node_info:
690
        node_info[pnode]['pinst'].append(instance)
691
      else:
692
        feedback_fn("  - ERROR: instance %s, connection to primary node"
693
                    " %s failed" % (instance, pnode))
694
        bad = True
695

    
696
      # If the instance is non-redundant we cannot survive losing its primary
697
      # node, so we are not N+1 compliant. On the other hand we have no disk
698
      # templates with more than one secondary so that situation is not well
699
      # supported either.
700
      # FIXME: does not support file-backed instances
701
      if len(inst_config.secondary_nodes) == 0:
702
        i_non_redundant.append(instance)
703
      elif len(inst_config.secondary_nodes) > 1:
704
        feedback_fn("  - WARNING: multiple secondaries for instance %s"
705
                    % instance)
706

    
707
      for snode in inst_config.secondary_nodes:
708
        if snode in node_info:
709
          node_info[snode]['sinst'].append(instance)
710
          if pnode not in node_info[snode]['sinst-by-pnode']:
711
            node_info[snode]['sinst-by-pnode'][pnode] = []
712
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
713
        else:
714
          feedback_fn("  - ERROR: instance %s, connection to secondary node"
715
                      " %s failed" % (instance, snode))
716

    
717
    feedback_fn("* Verifying orphan volumes")
718
    result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
719
                                       feedback_fn)
720
    bad = bad or result
721

    
722
    feedback_fn("* Verifying remaining instances")
723
    result = self._VerifyOrphanInstances(instancelist, node_instance,
724
                                         feedback_fn)
725
    bad = bad or result
726

    
727
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
728
      feedback_fn("* Verifying N+1 Memory redundancy")
729
      result = self._VerifyNPlusOneMemory(node_info, instance_cfg, feedback_fn)
730
      bad = bad or result
731

    
732
    feedback_fn("* Other Notes")
733
    if i_non_redundant:
734
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
735
                  % len(i_non_redundant))
736

    
737
    return int(bad)
738

    
739
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
740
    """Analize the post-hooks' result, handle it, and send some
741
    nicely-formatted feedback back to the user.
742

743
    Args:
744
      phase: the hooks phase that has just been run
745
      hooks_results: the results of the multi-node hooks rpc call
746
      feedback_fn: function to send feedback back to the caller
747
      lu_result: previous Exec result
748

749
    """
750
    # We only really run POST phase hooks, and are only interested in their results
751
    if phase == constants.HOOKS_PHASE_POST:
752
      # Used to change hooks' output to proper indentation
753
      indent_re = re.compile('^', re.M)
754
      feedback_fn("* Hooks Results")
755
      if not hooks_results:
756
        feedback_fn("  - ERROR: general communication failure")
757
        lu_result = 1
758
      else:
759
        for node_name in hooks_results:
760
          show_node_header = True
761
          res = hooks_results[node_name]
762
          if res is False or not isinstance(res, list):
763
            feedback_fn("    Communication failure")
764
            lu_result = 1
765
            continue
766
          for script, hkr, output in res:
767
            if hkr == constants.HKR_FAIL:
768
              # The node header is only shown once, if there are
769
              # failing hooks on that node
770
              if show_node_header:
771
                feedback_fn("  Node %s:" % node_name)
772
                show_node_header = False
773
              feedback_fn("    ERROR: Script %s failed, output:" % script)
774
              output = indent_re.sub('      ', output)
775
              feedback_fn("%s" % output)
776
              lu_result = 1
777

    
778
      return lu_result
779

    
780

    
781
class LUVerifyDisks(NoHooksLU):
782
  """Verifies the cluster disks status.
783

784
  """
785
  _OP_REQP = []
786

    
787
  def CheckPrereq(self):
788
    """Check prerequisites.
789

790
    This has no prerequisites.
791

792
    """
793
    pass
794

    
795
  def Exec(self, feedback_fn):
796
    """Verify integrity of cluster disks.
797

798
    """
799
    result = res_nodes, res_nlvm, res_instances, res_missing = [], {}, [], {}
800

    
801
    vg_name = self.cfg.GetVGName()
802
    nodes = utils.NiceSort(self.cfg.GetNodeList())
803
    instances = [self.cfg.GetInstanceInfo(name)
804
                 for name in self.cfg.GetInstanceList()]
805

    
806
    nv_dict = {}
807
    for inst in instances:
808
      inst_lvs = {}
809
      if (inst.status != "up" or
810
          inst.disk_template not in constants.DTS_NET_MIRROR):
811
        continue
812
      inst.MapLVsByNode(inst_lvs)
813
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
814
      for node, vol_list in inst_lvs.iteritems():
815
        for vol in vol_list:
816
          nv_dict[(node, vol)] = inst
817

    
818
    if not nv_dict:
819
      return result
820

    
821
    node_lvs = rpc.call_volume_list(nodes, vg_name)
822

    
823
    to_act = set()
824
    for node in nodes:
825
      # node_volume
826
      lvs = node_lvs[node]
827

    
828
      if isinstance(lvs, basestring):
829
        logger.Info("error enumerating LVs on node %s: %s" % (node, lvs))
830
        res_nlvm[node] = lvs
831
      elif not isinstance(lvs, dict):
832
        logger.Info("connection to node %s failed or invalid data returned" %
833
                    (node,))
834
        res_nodes.append(node)
835
        continue
836

    
837
      for lv_name, (_, lv_inactive, lv_online) in lvs.iteritems():
838
        inst = nv_dict.pop((node, lv_name), None)
839
        if (not lv_online and inst is not None
840
            and inst.name not in res_instances):
841
          res_instances.append(inst.name)
842

    
843
    # any leftover items in nv_dict are missing LVs, let's arrange the
844
    # data better
845
    for key, inst in nv_dict.iteritems():
846
      if inst.name not in res_missing:
847
        res_missing[inst.name] = []
848
      res_missing[inst.name].append(key)
849

    
850
    return result
851

    
852

    
853
class LURenameCluster(LogicalUnit):
854
  """Rename the cluster.
855

856
  """
857
  HPATH = "cluster-rename"
858
  HTYPE = constants.HTYPE_CLUSTER
859
  _OP_REQP = ["name"]
860
  REQ_WSSTORE = True
861

    
862
  def BuildHooksEnv(self):
863
    """Build hooks env.
864

865
    """
866
    env = {
867
      "OP_TARGET": self.sstore.GetClusterName(),
868
      "NEW_NAME": self.op.name,
869
      }
870
    mn = self.sstore.GetMasterNode()
871
    return env, [mn], [mn]
872

    
873
  def CheckPrereq(self):
874
    """Verify that the passed name is a valid one.
875

876
    """
877
    hostname = utils.HostInfo(self.op.name)
878

    
879
    new_name = hostname.name
880
    self.ip = new_ip = hostname.ip
881
    old_name = self.sstore.GetClusterName()
882
    old_ip = self.sstore.GetMasterIP()
883
    if new_name == old_name and new_ip == old_ip:
884
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
885
                                 " cluster has changed")
886
    if new_ip != old_ip:
887
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
888
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
889
                                   " reachable on the network. Aborting." %
890
                                   new_ip)
891

    
892
    self.op.name = new_name
893

    
894
  def Exec(self, feedback_fn):
895
    """Rename the cluster.
896

897
    """
898
    clustername = self.op.name
899
    ip = self.ip
900
    ss = self.sstore
901

    
902
    # shutdown the master IP
903
    master = ss.GetMasterNode()
904
    if not rpc.call_node_stop_master(master):
905
      raise errors.OpExecError("Could not disable the master role")
906

    
907
    try:
908
      # modify the sstore
909
      ss.SetKey(ss.SS_MASTER_IP, ip)
910
      ss.SetKey(ss.SS_CLUSTER_NAME, clustername)
911

    
912
      # Distribute updated ss config to all nodes
913
      myself = self.cfg.GetNodeInfo(master)
914
      dist_nodes = self.cfg.GetNodeList()
915
      if myself.name in dist_nodes:
916
        dist_nodes.remove(myself.name)
917

    
918
      logger.Debug("Copying updated ssconf data to all nodes")
919
      for keyname in [ss.SS_CLUSTER_NAME, ss.SS_MASTER_IP]:
920
        fname = ss.KeyToFilename(keyname)
921
        result = rpc.call_upload_file(dist_nodes, fname)
922
        for to_node in dist_nodes:
923
          if not result[to_node]:
924
            logger.Error("copy of file %s to node %s failed" %
925
                         (fname, to_node))
926
    finally:
927
      if not rpc.call_node_start_master(master):
928
        logger.Error("Could not re-enable the master role on the master,"
929
                     " please restart manually.")
930

    
931

    
932
def _RecursiveCheckIfLVMBased(disk):
933
  """Check if the given disk or its children are lvm-based.
934

935
  Args:
936
    disk: ganeti.objects.Disk object
937

938
  Returns:
939
    boolean indicating whether a LD_LV dev_type was found or not
940

941
  """
942
  if disk.children:
943
    for chdisk in disk.children:
944
      if _RecursiveCheckIfLVMBased(chdisk):
945
        return True
946
  return disk.dev_type == constants.LD_LV
947

    
948

    
949
class LUSetClusterParams(LogicalUnit):
950
  """Change the parameters of the cluster.
951

952
  """
953
  HPATH = "cluster-modify"
954
  HTYPE = constants.HTYPE_CLUSTER
955
  _OP_REQP = []
956

    
957
  def BuildHooksEnv(self):
958
    """Build hooks env.
959

960
    """
961
    env = {
962
      "OP_TARGET": self.sstore.GetClusterName(),
963
      "NEW_VG_NAME": self.op.vg_name,
964
      }
965
    mn = self.sstore.GetMasterNode()
966
    return env, [mn], [mn]
967

    
968
  def CheckPrereq(self):
969
    """Check prerequisites.
970

971
    This checks whether the given params don't conflict and
972
    if the given volume group is valid.
973

974
    """
975
    if not self.op.vg_name:
976
      instances = [self.cfg.GetInstanceInfo(name)
977
                   for name in self.cfg.GetInstanceList()]
978
      for inst in instances:
979
        for disk in inst.disks:
980
          if _RecursiveCheckIfLVMBased(disk):
981
            raise errors.OpPrereqError("Cannot disable lvm storage while"
982
                                       " lvm-based instances exist")
983

    
984
    # if vg_name not None, checks given volume group on all nodes
985
    if self.op.vg_name:
986
      node_list = self.cfg.GetNodeList()
987
      vglist = rpc.call_vg_list(node_list)
988
      for node in node_list:
989
        vgstatus = utils.CheckVolumeGroupSize(vglist[node], self.op.vg_name,
990
                                              constants.MIN_VG_SIZE)
991
        if vgstatus:
992
          raise errors.OpPrereqError("Error on node '%s': %s" %
993
                                     (node, vgstatus))
994

    
995
  def Exec(self, feedback_fn):
996
    """Change the parameters of the cluster.
997

998
    """
999
    if self.op.vg_name != self.cfg.GetVGName():
1000
      self.cfg.SetVGName(self.op.vg_name)
1001
    else:
1002
      feedback_fn("Cluster LVM configuration already in desired"
1003
                  " state, not changing")
1004

    
1005

    
1006
def _WaitForSync(cfgw, instance, proc, oneshot=False, unlock=False):
1007
  """Sleep and poll for an instance's disk to sync.
1008

1009
  """
1010
  if not instance.disks:
1011
    return True
1012

    
1013
  if not oneshot:
1014
    proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
1015

    
1016
  node = instance.primary_node
1017

    
1018
  for dev in instance.disks:
1019
    cfgw.SetDiskID(dev, node)
1020

    
1021
  retries = 0
1022
  while True:
1023
    max_time = 0
1024
    done = True
1025
    cumul_degraded = False
1026
    rstats = rpc.call_blockdev_getmirrorstatus(node, instance.disks)
1027
    if not rstats:
1028
      proc.LogWarning("Can't get any data from node %s" % node)
1029
      retries += 1
1030
      if retries >= 10:
1031
        raise errors.RemoteError("Can't contact node %s for mirror data,"
1032
                                 " aborting." % node)
1033
      time.sleep(6)
1034
      continue
1035
    retries = 0
1036
    for i in range(len(rstats)):
1037
      mstat = rstats[i]
1038
      if mstat is None:
1039
        proc.LogWarning("Can't compute data for node %s/%s" %
1040
                        (node, instance.disks[i].iv_name))
1041
        continue
1042
      # we ignore the ldisk parameter
1043
      perc_done, est_time, is_degraded, _ = mstat
1044
      cumul_degraded = cumul_degraded or (is_degraded and perc_done is None)
1045
      if perc_done is not None:
1046
        done = False
1047
        if est_time is not None:
1048
          rem_time = "%d estimated seconds remaining" % est_time
1049
          max_time = est_time
1050
        else:
1051
          rem_time = "no time estimate"
1052
        proc.LogInfo("- device %s: %5.2f%% done, %s" %
1053
                     (instance.disks[i].iv_name, perc_done, rem_time))
1054
    if done or oneshot:
1055
      break
1056

    
1057
    if unlock:
1058
      #utils.Unlock('cmd')
1059
      pass
1060
    try:
1061
      time.sleep(min(60, max_time))
1062
    finally:
1063
      if unlock:
1064
        #utils.Lock('cmd')
1065
        pass
1066

    
1067
  if done:
1068
    proc.LogInfo("Instance %s's disks are in sync." % instance.name)
1069
  return not cumul_degraded
1070

    
1071

    
1072
def _CheckDiskConsistency(cfgw, dev, node, on_primary, ldisk=False):
1073
  """Check that mirrors are not degraded.
1074

1075
  The ldisk parameter, if True, will change the test from the
1076
  is_degraded attribute (which represents overall non-ok status for
1077
  the device(s)) to the ldisk (representing the local storage status).
1078

1079
  """
1080
  cfgw.SetDiskID(dev, node)
1081
  if ldisk:
1082
    idx = 6
1083
  else:
1084
    idx = 5
1085

    
1086
  result = True
1087
  if on_primary or dev.AssembleOnSecondary():
1088
    rstats = rpc.call_blockdev_find(node, dev)
1089
    if not rstats:
1090
      logger.ToStderr("Node %s: Disk degraded, not found or node down" % node)
1091
      result = False
1092
    else:
1093
      result = result and (not rstats[idx])
1094
  if dev.children:
1095
    for child in dev.children:
1096
      result = result and _CheckDiskConsistency(cfgw, child, node, on_primary)
1097

    
1098
  return result
1099

    
1100

    
1101
class LUDiagnoseOS(NoHooksLU):
1102
  """Logical unit for OS diagnose/query.
1103

1104
  """
1105
  _OP_REQP = ["output_fields", "names"]
1106

    
1107
  def CheckPrereq(self):
1108
    """Check prerequisites.
1109

1110
    This always succeeds, since this is a pure query LU.
1111

1112
    """
1113
    if self.op.names:
1114
      raise errors.OpPrereqError("Selective OS query not supported")
1115

    
1116
    self.dynamic_fields = frozenset(["name", "valid", "node_status"])
1117
    _CheckOutputFields(static=[],
1118
                       dynamic=self.dynamic_fields,
1119
                       selected=self.op.output_fields)
1120

    
1121
  @staticmethod
1122
  def _DiagnoseByOS(node_list, rlist):
1123
    """Remaps a per-node return list into an a per-os per-node dictionary
1124

1125
      Args:
1126
        node_list: a list with the names of all nodes
1127
        rlist: a map with node names as keys and OS objects as values
1128

1129
      Returns:
1130
        map: a map with osnames as keys and as value another map, with
1131
             nodes as
1132
             keys and list of OS objects as values
1133
             e.g. {"debian-etch": {"node1": [<object>,...],
1134
                                   "node2": [<object>,]}
1135
                  }
1136

1137
    """
1138
    all_os = {}
1139
    for node_name, nr in rlist.iteritems():
1140
      if not nr:
1141
        continue
1142
      for os_obj in nr:
1143
        if os_obj.name not in all_os:
1144
          # build a list of nodes for this os containing empty lists
1145
          # for each node in node_list
1146
          all_os[os_obj.name] = {}
1147
          for nname in node_list:
1148
            all_os[os_obj.name][nname] = []
1149
        all_os[os_obj.name][node_name].append(os_obj)
1150
    return all_os
1151

    
1152
  def Exec(self, feedback_fn):
1153
    """Compute the list of OSes.
1154

1155
    """
1156
    node_list = self.cfg.GetNodeList()
1157
    node_data = rpc.call_os_diagnose(node_list)
1158
    if node_data == False:
1159
      raise errors.OpExecError("Can't gather the list of OSes")
1160
    pol = self._DiagnoseByOS(node_list, node_data)
1161
    output = []
1162
    for os_name, os_data in pol.iteritems():
1163
      row = []
1164
      for field in self.op.output_fields:
1165
        if field == "name":
1166
          val = os_name
1167
        elif field == "valid":
1168
          val = utils.all([osl and osl[0] for osl in os_data.values()])
1169
        elif field == "node_status":
1170
          val = {}
1171
          for node_name, nos_list in os_data.iteritems():
1172
            val[node_name] = [(v.status, v.path) for v in nos_list]
1173
        else:
1174
          raise errors.ParameterError(field)
1175
        row.append(val)
1176
      output.append(row)
1177

    
1178
    return output
1179

    
1180

    
1181
class LURemoveNode(LogicalUnit):
1182
  """Logical unit for removing a node.
1183

1184
  """
1185
  HPATH = "node-remove"
1186
  HTYPE = constants.HTYPE_NODE
1187
  _OP_REQP = ["node_name"]
1188

    
1189
  def BuildHooksEnv(self):
1190
    """Build hooks env.
1191

1192
    This doesn't run on the target node in the pre phase as a failed
1193
    node would then be impossible to remove.
1194

1195
    """
1196
    env = {
1197
      "OP_TARGET": self.op.node_name,
1198
      "NODE_NAME": self.op.node_name,
1199
      }
1200
    all_nodes = self.cfg.GetNodeList()
1201
    all_nodes.remove(self.op.node_name)
1202
    return env, all_nodes, all_nodes
1203

    
1204
  def CheckPrereq(self):
1205
    """Check prerequisites.
1206

1207
    This checks:
1208
     - the node exists in the configuration
1209
     - it does not have primary or secondary instances
1210
     - it's not the master
1211

1212
    Any errors are signalled by raising errors.OpPrereqError.
1213

1214
    """
1215
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
1216
    if node is None:
1217
      raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
1218

    
1219
    instance_list = self.cfg.GetInstanceList()
1220

    
1221
    masternode = self.sstore.GetMasterNode()
1222
    if node.name == masternode:
1223
      raise errors.OpPrereqError("Node is the master node,"
1224
                                 " you need to failover first.")
1225

    
1226
    for instance_name in instance_list:
1227
      instance = self.cfg.GetInstanceInfo(instance_name)
1228
      if node.name == instance.primary_node:
1229
        raise errors.OpPrereqError("Instance %s still running on the node,"
1230
                                   " please remove first." % instance_name)
1231
      if node.name in instance.secondary_nodes:
1232
        raise errors.OpPrereqError("Instance %s has node as a secondary,"
1233
                                   " please remove first." % instance_name)
1234
    self.op.node_name = node.name
1235
    self.node = node
1236

    
1237
  def Exec(self, feedback_fn):
1238
    """Removes the node from the cluster.
1239

1240
    """
1241
    node = self.node
1242
    logger.Info("stopping the node daemon and removing configs from node %s" %
1243
                node.name)
1244

    
1245
    rpc.call_node_leave_cluster(node.name)
1246

    
1247
    logger.Info("Removing node %s from config" % node.name)
1248

    
1249
    self.cfg.RemoveNode(node.name)
1250
    # Remove the node from the Ganeti Lock Manager
1251
    self.context.glm.remove(locking.LEVEL_NODE, node.name)
1252

    
1253
    utils.RemoveHostFromEtcHosts(node.name)
1254

    
1255

    
1256
class LUQueryNodes(NoHooksLU):
1257
  """Logical unit for querying nodes.
1258

1259
  """
1260
  _OP_REQP = ["output_fields", "names"]
1261

    
1262
  def CheckPrereq(self):
1263
    """Check prerequisites.
1264

1265
    This checks that the fields required are valid output fields.
1266

1267
    """
1268
    self.dynamic_fields = frozenset([
1269
      "dtotal", "dfree",
1270
      "mtotal", "mnode", "mfree",
1271
      "bootid",
1272
      "ctotal",
1273
      ])
1274

    
1275
    _CheckOutputFields(static=["name", "pinst_cnt", "sinst_cnt",
1276
                               "pinst_list", "sinst_list",
1277
                               "pip", "sip", "tags"],
1278
                       dynamic=self.dynamic_fields,
1279
                       selected=self.op.output_fields)
1280

    
1281
    self.wanted = _GetWantedNodes(self, self.op.names)
1282

    
1283
  def Exec(self, feedback_fn):
1284
    """Computes the list of nodes and their attributes.
1285

1286
    """
1287
    nodenames = self.wanted
1288
    nodelist = [self.cfg.GetNodeInfo(name) for name in nodenames]
1289

    
1290
    # begin data gathering
1291

    
1292
    if self.dynamic_fields.intersection(self.op.output_fields):
1293
      live_data = {}
1294
      node_data = rpc.call_node_info(nodenames, self.cfg.GetVGName())
1295
      for name in nodenames:
1296
        nodeinfo = node_data.get(name, None)
1297
        if nodeinfo:
1298
          live_data[name] = {
1299
            "mtotal": utils.TryConvert(int, nodeinfo['memory_total']),
1300
            "mnode": utils.TryConvert(int, nodeinfo['memory_dom0']),
1301
            "mfree": utils.TryConvert(int, nodeinfo['memory_free']),
1302
            "dtotal": utils.TryConvert(int, nodeinfo['vg_size']),
1303
            "dfree": utils.TryConvert(int, nodeinfo['vg_free']),
1304
            "ctotal": utils.TryConvert(int, nodeinfo['cpu_total']),
1305
            "bootid": nodeinfo['bootid'],
1306
            }
1307
        else:
1308
          live_data[name] = {}
1309
    else:
1310
      live_data = dict.fromkeys(nodenames, {})
1311

    
1312
    node_to_primary = dict([(name, set()) for name in nodenames])
1313
    node_to_secondary = dict([(name, set()) for name in nodenames])
1314

    
1315
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
1316
                             "sinst_cnt", "sinst_list"))
1317
    if inst_fields & frozenset(self.op.output_fields):
1318
      instancelist = self.cfg.GetInstanceList()
1319

    
1320
      for instance_name in instancelist:
1321
        inst = self.cfg.GetInstanceInfo(instance_name)
1322
        if inst.primary_node in node_to_primary:
1323
          node_to_primary[inst.primary_node].add(inst.name)
1324
        for secnode in inst.secondary_nodes:
1325
          if secnode in node_to_secondary:
1326
            node_to_secondary[secnode].add(inst.name)
1327

    
1328
    # end data gathering
1329

    
1330
    output = []
1331
    for node in nodelist:
1332
      node_output = []
1333
      for field in self.op.output_fields:
1334
        if field == "name":
1335
          val = node.name
1336
        elif field == "pinst_list":
1337
          val = list(node_to_primary[node.name])
1338
        elif field == "sinst_list":
1339
          val = list(node_to_secondary[node.name])
1340
        elif field == "pinst_cnt":
1341
          val = len(node_to_primary[node.name])
1342
        elif field == "sinst_cnt":
1343
          val = len(node_to_secondary[node.name])
1344
        elif field == "pip":
1345
          val = node.primary_ip
1346
        elif field == "sip":
1347
          val = node.secondary_ip
1348
        elif field == "tags":
1349
          val = list(node.GetTags())
1350
        elif field in self.dynamic_fields:
1351
          val = live_data[node.name].get(field, None)
1352
        else:
1353
          raise errors.ParameterError(field)
1354
        node_output.append(val)
1355
      output.append(node_output)
1356

    
1357
    return output
1358

    
1359

    
1360
class LUQueryNodeVolumes(NoHooksLU):
1361
  """Logical unit for getting volumes on node(s).
1362

1363
  """
1364
  _OP_REQP = ["nodes", "output_fields"]
1365

    
1366
  def CheckPrereq(self):
1367
    """Check prerequisites.
1368

1369
    This checks that the fields required are valid output fields.
1370

1371
    """
1372
    self.nodes = _GetWantedNodes(self, self.op.nodes)
1373

    
1374
    _CheckOutputFields(static=["node"],
1375
                       dynamic=["phys", "vg", "name", "size", "instance"],
1376
                       selected=self.op.output_fields)
1377

    
1378

    
1379
  def Exec(self, feedback_fn):
1380
    """Computes the list of nodes and their attributes.
1381

1382
    """
1383
    nodenames = self.nodes
1384
    volumes = rpc.call_node_volumes(nodenames)
1385

    
1386
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
1387
             in self.cfg.GetInstanceList()]
1388

    
1389
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
1390

    
1391
    output = []
1392
    for node in nodenames:
1393
      if node not in volumes or not volumes[node]:
1394
        continue
1395

    
1396
      node_vols = volumes[node][:]
1397
      node_vols.sort(key=lambda vol: vol['dev'])
1398

    
1399
      for vol in node_vols:
1400
        node_output = []
1401
        for field in self.op.output_fields:
1402
          if field == "node":
1403
            val = node
1404
          elif field == "phys":
1405
            val = vol['dev']
1406
          elif field == "vg":
1407
            val = vol['vg']
1408
          elif field == "name":
1409
            val = vol['name']
1410
          elif field == "size":
1411
            val = int(float(vol['size']))
1412
          elif field == "instance":
1413
            for inst in ilist:
1414
              if node not in lv_by_node[inst]:
1415
                continue
1416
              if vol['name'] in lv_by_node[inst][node]:
1417
                val = inst.name
1418
                break
1419
            else:
1420
              val = '-'
1421
          else:
1422
            raise errors.ParameterError(field)
1423
          node_output.append(str(val))
1424

    
1425
        output.append(node_output)
1426

    
1427
    return output
1428

    
1429

    
1430
class LUAddNode(LogicalUnit):
1431
  """Logical unit for adding node to the cluster.
1432

1433
  """
1434
  HPATH = "node-add"
1435
  HTYPE = constants.HTYPE_NODE
1436
  _OP_REQP = ["node_name"]
1437

    
1438
  def BuildHooksEnv(self):
1439
    """Build hooks env.
1440

1441
    This will run on all nodes before, and on all nodes + the new node after.
1442

1443
    """
1444
    env = {
1445
      "OP_TARGET": self.op.node_name,
1446
      "NODE_NAME": self.op.node_name,
1447
      "NODE_PIP": self.op.primary_ip,
1448
      "NODE_SIP": self.op.secondary_ip,
1449
      }
1450
    nodes_0 = self.cfg.GetNodeList()
1451
    nodes_1 = nodes_0 + [self.op.node_name, ]
1452
    return env, nodes_0, nodes_1
1453

    
1454
  def CheckPrereq(self):
1455
    """Check prerequisites.
1456

1457
    This checks:
1458
     - the new node is not already in the config
1459
     - it is resolvable
1460
     - its parameters (single/dual homed) matches the cluster
1461

1462
    Any errors are signalled by raising errors.OpPrereqError.
1463

1464
    """
1465
    node_name = self.op.node_name
1466
    cfg = self.cfg
1467

    
1468
    dns_data = utils.HostInfo(node_name)
1469

    
1470
    node = dns_data.name
1471
    primary_ip = self.op.primary_ip = dns_data.ip
1472
    secondary_ip = getattr(self.op, "secondary_ip", None)
1473
    if secondary_ip is None:
1474
      secondary_ip = primary_ip
1475
    if not utils.IsValidIP(secondary_ip):
1476
      raise errors.OpPrereqError("Invalid secondary IP given")
1477
    self.op.secondary_ip = secondary_ip
1478

    
1479
    node_list = cfg.GetNodeList()
1480
    if not self.op.readd and node in node_list:
1481
      raise errors.OpPrereqError("Node %s is already in the configuration" %
1482
                                 node)
1483
    elif self.op.readd and node not in node_list:
1484
      raise errors.OpPrereqError("Node %s is not in the configuration" % node)
1485

    
1486
    for existing_node_name in node_list:
1487
      existing_node = cfg.GetNodeInfo(existing_node_name)
1488

    
1489
      if self.op.readd and node == existing_node_name:
1490
        if (existing_node.primary_ip != primary_ip or
1491
            existing_node.secondary_ip != secondary_ip):
1492
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
1493
                                     " address configuration as before")
1494
        continue
1495

    
1496
      if (existing_node.primary_ip == primary_ip or
1497
          existing_node.secondary_ip == primary_ip or
1498
          existing_node.primary_ip == secondary_ip or
1499
          existing_node.secondary_ip == secondary_ip):
1500
        raise errors.OpPrereqError("New node ip address(es) conflict with"
1501
                                   " existing node %s" % existing_node.name)
1502

    
1503
    # check that the type of the node (single versus dual homed) is the
1504
    # same as for the master
1505
    myself = cfg.GetNodeInfo(self.sstore.GetMasterNode())
1506
    master_singlehomed = myself.secondary_ip == myself.primary_ip
1507
    newbie_singlehomed = secondary_ip == primary_ip
1508
    if master_singlehomed != newbie_singlehomed:
1509
      if master_singlehomed:
1510
        raise errors.OpPrereqError("The master has no private ip but the"
1511
                                   " new node has one")
1512
      else:
1513
        raise errors.OpPrereqError("The master has a private ip but the"
1514
                                   " new node doesn't have one")
1515

    
1516
    # checks reachablity
1517
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
1518
      raise errors.OpPrereqError("Node not reachable by ping")
1519

    
1520
    if not newbie_singlehomed:
1521
      # check reachability from my secondary ip to newbie's secondary ip
1522
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
1523
                           source=myself.secondary_ip):
1524
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
1525
                                   " based ping to noded port")
1526

    
1527
    self.new_node = objects.Node(name=node,
1528
                                 primary_ip=primary_ip,
1529
                                 secondary_ip=secondary_ip)
1530

    
1531
  def Exec(self, feedback_fn):
1532
    """Adds the new node to the cluster.
1533

1534
    """
1535
    new_node = self.new_node
1536
    node = new_node.name
1537

    
1538
    # check connectivity
1539
    result = rpc.call_version([node])[node]
1540
    if result:
1541
      if constants.PROTOCOL_VERSION == result:
1542
        logger.Info("communication to node %s fine, sw version %s match" %
1543
                    (node, result))
1544
      else:
1545
        raise errors.OpExecError("Version mismatch master version %s,"
1546
                                 " node version %s" %
1547
                                 (constants.PROTOCOL_VERSION, result))
1548
    else:
1549
      raise errors.OpExecError("Cannot get version from the new node")
1550

    
1551
    # setup ssh on node
1552
    logger.Info("copy ssh key to node %s" % node)
1553
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1554
    keyarray = []
1555
    keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
1556
                constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
1557
                priv_key, pub_key]
1558

    
1559
    for i in keyfiles:
1560
      f = open(i, 'r')
1561
      try:
1562
        keyarray.append(f.read())
1563
      finally:
1564
        f.close()
1565

    
1566
    result = rpc.call_node_add(node, keyarray[0], keyarray[1], keyarray[2],
1567
                               keyarray[3], keyarray[4], keyarray[5])
1568

    
1569
    if not result:
1570
      raise errors.OpExecError("Cannot transfer ssh keys to the new node")
1571

    
1572
    # Add node to our /etc/hosts, and add key to known_hosts
1573
    utils.AddHostToEtcHosts(new_node.name)
1574

    
1575
    if new_node.secondary_ip != new_node.primary_ip:
1576
      if not rpc.call_node_tcp_ping(new_node.name,
1577
                                    constants.LOCALHOST_IP_ADDRESS,
1578
                                    new_node.secondary_ip,
1579
                                    constants.DEFAULT_NODED_PORT,
1580
                                    10, False):
1581
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
1582
                                 " you gave (%s). Please fix and re-run this"
1583
                                 " command." % new_node.secondary_ip)
1584

    
1585
    node_verify_list = [self.sstore.GetMasterNode()]
1586
    node_verify_param = {
1587
      'nodelist': [node],
1588
      # TODO: do a node-net-test as well?
1589
    }
1590

    
1591
    result = rpc.call_node_verify(node_verify_list, node_verify_param)
1592
    for verifier in node_verify_list:
1593
      if not result[verifier]:
1594
        raise errors.OpExecError("Cannot communicate with %s's node daemon"
1595
                                 " for remote verification" % verifier)
1596
      if result[verifier]['nodelist']:
1597
        for failed in result[verifier]['nodelist']:
1598
          feedback_fn("ssh/hostname verification failed %s -> %s" %
1599
                      (verifier, result[verifier]['nodelist'][failed]))
1600
        raise errors.OpExecError("ssh/hostname verification failed.")
1601

    
1602
    # Distribute updated /etc/hosts and known_hosts to all nodes,
1603
    # including the node just added
1604
    myself = self.cfg.GetNodeInfo(self.sstore.GetMasterNode())
1605
    dist_nodes = self.cfg.GetNodeList()
1606
    if not self.op.readd:
1607
      dist_nodes.append(node)
1608
    if myself.name in dist_nodes:
1609
      dist_nodes.remove(myself.name)
1610

    
1611
    logger.Debug("Copying hosts and known_hosts to all nodes")
1612
    for fname in (constants.ETC_HOSTS, constants.SSH_KNOWN_HOSTS_FILE):
1613
      result = rpc.call_upload_file(dist_nodes, fname)
1614
      for to_node in dist_nodes:
1615
        if not result[to_node]:
1616
          logger.Error("copy of file %s to node %s failed" %
1617
                       (fname, to_node))
1618

    
1619
    to_copy = self.sstore.GetFileList()
1620
    if self.sstore.GetHypervisorType() == constants.HT_XEN_HVM31:
1621
      to_copy.append(constants.VNC_PASSWORD_FILE)
1622
    for fname in to_copy:
1623
      result = rpc.call_upload_file([node], fname)
1624
      if not result[node]:
1625
        logger.Error("could not copy file %s to node %s" % (fname, node))
1626

    
1627
    if not self.op.readd:
1628
      logger.Info("adding node %s to cluster.conf" % node)
1629
      self.cfg.AddNode(new_node)
1630
      # Add the new node to the Ganeti Lock Manager
1631
      self.context.glm.add(locking.LEVEL_NODE, node)
1632

    
1633

    
1634
class LUMasterFailover(LogicalUnit):
1635
  """Failover the master node to the current node.
1636

1637
  This is a special LU in that it must run on a non-master node.
1638

1639
  """
1640
  HPATH = "master-failover"
1641
  HTYPE = constants.HTYPE_CLUSTER
1642
  REQ_MASTER = False
1643
  REQ_WSSTORE = True
1644
  _OP_REQP = []
1645

    
1646
  def BuildHooksEnv(self):
1647
    """Build hooks env.
1648

1649
    This will run on the new master only in the pre phase, and on all
1650
    the nodes in the post phase.
1651

1652
    """
1653
    env = {
1654
      "OP_TARGET": self.new_master,
1655
      "NEW_MASTER": self.new_master,
1656
      "OLD_MASTER": self.old_master,
1657
      }
1658
    return env, [self.new_master], self.cfg.GetNodeList()
1659

    
1660
  def CheckPrereq(self):
1661
    """Check prerequisites.
1662

1663
    This checks that we are not already the master.
1664

1665
    """
1666
    self.new_master = utils.HostInfo().name
1667
    self.old_master = self.sstore.GetMasterNode()
1668

    
1669
    if self.old_master == self.new_master:
1670
      raise errors.OpPrereqError("This commands must be run on the node"
1671
                                 " where you want the new master to be."
1672
                                 " %s is already the master" %
1673
                                 self.old_master)
1674

    
1675
  def Exec(self, feedback_fn):
1676
    """Failover the master node.
1677

1678
    This command, when run on a non-master node, will cause the current
1679
    master to cease being master, and the non-master to become new
1680
    master.
1681

1682
    """
1683
    #TODO: do not rely on gethostname returning the FQDN
1684
    logger.Info("setting master to %s, old master: %s" %
1685
                (self.new_master, self.old_master))
1686

    
1687
    if not rpc.call_node_stop_master(self.old_master):
1688
      logger.Error("could disable the master role on the old master"
1689
                   " %s, please disable manually" % self.old_master)
1690

    
1691
    ss = self.sstore
1692
    ss.SetKey(ss.SS_MASTER_NODE, self.new_master)
1693
    if not rpc.call_upload_file(self.cfg.GetNodeList(),
1694
                                ss.KeyToFilename(ss.SS_MASTER_NODE)):
1695
      logger.Error("could not distribute the new simple store master file"
1696
                   " to the other nodes, please check.")
1697

    
1698
    if not rpc.call_node_start_master(self.new_master):
1699
      logger.Error("could not start the master role on the new master"
1700
                   " %s, please check" % self.new_master)
1701
      feedback_fn("Error in activating the master IP on the new master,"
1702
                  " please fix manually.")
1703

    
1704

    
1705

    
1706
class LUQueryClusterInfo(NoHooksLU):
1707
  """Query cluster configuration.
1708

1709
  """
1710
  _OP_REQP = []
1711
  REQ_MASTER = False
1712

    
1713
  def CheckPrereq(self):
1714
    """No prerequsites needed for this LU.
1715

1716
    """
1717
    pass
1718

    
1719
  def Exec(self, feedback_fn):
1720
    """Return cluster config.
1721

1722
    """
1723
    result = {
1724
      "name": self.sstore.GetClusterName(),
1725
      "software_version": constants.RELEASE_VERSION,
1726
      "protocol_version": constants.PROTOCOL_VERSION,
1727
      "config_version": constants.CONFIG_VERSION,
1728
      "os_api_version": constants.OS_API_VERSION,
1729
      "export_version": constants.EXPORT_VERSION,
1730
      "master": self.sstore.GetMasterNode(),
1731
      "architecture": (platform.architecture()[0], platform.machine()),
1732
      "hypervisor_type": self.sstore.GetHypervisorType(),
1733
      }
1734

    
1735
    return result
1736

    
1737

    
1738
class LUDumpClusterConfig(NoHooksLU):
1739
  """Return a text-representation of the cluster-config.
1740

1741
  """
1742
  _OP_REQP = []
1743

    
1744
  def CheckPrereq(self):
1745
    """No prerequisites.
1746

1747
    """
1748
    pass
1749

    
1750
  def Exec(self, feedback_fn):
1751
    """Dump a representation of the cluster config to the standard output.
1752

1753
    """
1754
    return self.cfg.DumpConfig()
1755

    
1756

    
1757
class LUActivateInstanceDisks(NoHooksLU):
1758
  """Bring up an instance's disks.
1759

1760
  """
1761
  _OP_REQP = ["instance_name"]
1762

    
1763
  def CheckPrereq(self):
1764
    """Check prerequisites.
1765

1766
    This checks that the instance is in the cluster.
1767

1768
    """
1769
    instance = self.cfg.GetInstanceInfo(
1770
      self.cfg.ExpandInstanceName(self.op.instance_name))
1771
    if instance is None:
1772
      raise errors.OpPrereqError("Instance '%s' not known" %
1773
                                 self.op.instance_name)
1774
    self.instance = instance
1775

    
1776

    
1777
  def Exec(self, feedback_fn):
1778
    """Activate the disks.
1779

1780
    """
1781
    disks_ok, disks_info = _AssembleInstanceDisks(self.instance, self.cfg)
1782
    if not disks_ok:
1783
      raise errors.OpExecError("Cannot activate block devices")
1784

    
1785
    return disks_info
1786

    
1787

    
1788
def _AssembleInstanceDisks(instance, cfg, ignore_secondaries=False):
1789
  """Prepare the block devices for an instance.
1790

1791
  This sets up the block devices on all nodes.
1792

1793
  Args:
1794
    instance: a ganeti.objects.Instance object
1795
    ignore_secondaries: if true, errors on secondary nodes won't result
1796
                        in an error return from the function
1797

1798
  Returns:
1799
    false if the operation failed
1800
    list of (host, instance_visible_name, node_visible_name) if the operation
1801
         suceeded with the mapping from node devices to instance devices
1802
  """
1803
  device_info = []
1804
  disks_ok = True
1805
  iname = instance.name
1806
  # With the two passes mechanism we try to reduce the window of
1807
  # opportunity for the race condition of switching DRBD to primary
1808
  # before handshaking occured, but we do not eliminate it
1809

    
1810
  # The proper fix would be to wait (with some limits) until the
1811
  # connection has been made and drbd transitions from WFConnection
1812
  # into any other network-connected state (Connected, SyncTarget,
1813
  # SyncSource, etc.)
1814

    
1815
  # 1st pass, assemble on all nodes in secondary mode
1816
  for inst_disk in instance.disks:
1817
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
1818
      cfg.SetDiskID(node_disk, node)
1819
      result = rpc.call_blockdev_assemble(node, node_disk, iname, False)
1820
      if not result:
1821
        logger.Error("could not prepare block device %s on node %s"
1822
                     " (is_primary=False, pass=1)" % (inst_disk.iv_name, node))
1823
        if not ignore_secondaries:
1824
          disks_ok = False
1825

    
1826
  # FIXME: race condition on drbd migration to primary
1827

    
1828
  # 2nd pass, do only the primary node
1829
  for inst_disk in instance.disks:
1830
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
1831
      if node != instance.primary_node:
1832
        continue
1833
      cfg.SetDiskID(node_disk, node)
1834
      result = rpc.call_blockdev_assemble(node, node_disk, iname, True)
1835
      if not result:
1836
        logger.Error("could not prepare block device %s on node %s"
1837
                     " (is_primary=True, pass=2)" % (inst_disk.iv_name, node))
1838
        disks_ok = False
1839
    device_info.append((instance.primary_node, inst_disk.iv_name, result))
1840

    
1841
  # leave the disks configured for the primary node
1842
  # this is a workaround that would be fixed better by
1843
  # improving the logical/physical id handling
1844
  for disk in instance.disks:
1845
    cfg.SetDiskID(disk, instance.primary_node)
1846

    
1847
  return disks_ok, device_info
1848

    
1849

    
1850
def _StartInstanceDisks(cfg, instance, force):
1851
  """Start the disks of an instance.
1852

1853
  """
1854
  disks_ok, dummy = _AssembleInstanceDisks(instance, cfg,
1855
                                           ignore_secondaries=force)
1856
  if not disks_ok:
1857
    _ShutdownInstanceDisks(instance, cfg)
1858
    if force is not None and not force:
1859
      logger.Error("If the message above refers to a secondary node,"
1860
                   " you can retry the operation using '--force'.")
1861
    raise errors.OpExecError("Disk consistency error")
1862

    
1863

    
1864
class LUDeactivateInstanceDisks(NoHooksLU):
1865
  """Shutdown an instance's disks.
1866

1867
  """
1868
  _OP_REQP = ["instance_name"]
1869

    
1870
  def CheckPrereq(self):
1871
    """Check prerequisites.
1872

1873
    This checks that the instance is in the cluster.
1874

1875
    """
1876
    instance = self.cfg.GetInstanceInfo(
1877
      self.cfg.ExpandInstanceName(self.op.instance_name))
1878
    if instance is None:
1879
      raise errors.OpPrereqError("Instance '%s' not known" %
1880
                                 self.op.instance_name)
1881
    self.instance = instance
1882

    
1883
  def Exec(self, feedback_fn):
1884
    """Deactivate the disks
1885

1886
    """
1887
    instance = self.instance
1888
    ins_l = rpc.call_instance_list([instance.primary_node])
1889
    ins_l = ins_l[instance.primary_node]
1890
    if not type(ins_l) is list:
1891
      raise errors.OpExecError("Can't contact node '%s'" %
1892
                               instance.primary_node)
1893

    
1894
    if self.instance.name in ins_l:
1895
      raise errors.OpExecError("Instance is running, can't shutdown"
1896
                               " block devices.")
1897

    
1898
    _ShutdownInstanceDisks(instance, self.cfg)
1899

    
1900

    
1901
def _ShutdownInstanceDisks(instance, cfg, ignore_primary=False):
1902
  """Shutdown block devices of an instance.
1903

1904
  This does the shutdown on all nodes of the instance.
1905

1906
  If the ignore_primary is false, errors on the primary node are
1907
  ignored.
1908

1909
  """
1910
  result = True
1911
  for disk in instance.disks:
1912
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
1913
      cfg.SetDiskID(top_disk, node)
1914
      if not rpc.call_blockdev_shutdown(node, top_disk):
1915
        logger.Error("could not shutdown block device %s on node %s" %
1916
                     (disk.iv_name, node))
1917
        if not ignore_primary or node != instance.primary_node:
1918
          result = False
1919
  return result
1920

    
1921

    
1922
def _CheckNodeFreeMemory(cfg, node, reason, requested):
1923
  """Checks if a node has enough free memory.
1924

1925
  This function check if a given node has the needed amount of free
1926
  memory. In case the node has less memory or we cannot get the
1927
  information from the node, this function raise an OpPrereqError
1928
  exception.
1929

1930
  Args:
1931
    - cfg: a ConfigWriter instance
1932
    - node: the node name
1933
    - reason: string to use in the error message
1934
    - requested: the amount of memory in MiB
1935

1936
  """
1937
  nodeinfo = rpc.call_node_info([node], cfg.GetVGName())
1938
  if not nodeinfo or not isinstance(nodeinfo, dict):
1939
    raise errors.OpPrereqError("Could not contact node %s for resource"
1940
                             " information" % (node,))
1941

    
1942
  free_mem = nodeinfo[node].get('memory_free')
1943
  if not isinstance(free_mem, int):
1944
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
1945
                             " was '%s'" % (node, free_mem))
1946
  if requested > free_mem:
1947
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
1948
                             " needed %s MiB, available %s MiB" %
1949
                             (node, reason, requested, free_mem))
1950

    
1951

    
1952
class LUStartupInstance(LogicalUnit):
1953
  """Starts an instance.
1954

1955
  """
1956
  HPATH = "instance-start"
1957
  HTYPE = constants.HTYPE_INSTANCE
1958
  _OP_REQP = ["instance_name", "force"]
1959

    
1960
  def BuildHooksEnv(self):
1961
    """Build hooks env.
1962

1963
    This runs on master, primary and secondary nodes of the instance.
1964

1965
    """
1966
    env = {
1967
      "FORCE": self.op.force,
1968
      }
1969
    env.update(_BuildInstanceHookEnvByObject(self.instance))
1970
    nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
1971
          list(self.instance.secondary_nodes))
1972
    return env, nl, nl
1973

    
1974
  def CheckPrereq(self):
1975
    """Check prerequisites.
1976

1977
    This checks that the instance is in the cluster.
1978

1979
    """
1980
    instance = self.cfg.GetInstanceInfo(
1981
      self.cfg.ExpandInstanceName(self.op.instance_name))
1982
    if instance is None:
1983
      raise errors.OpPrereqError("Instance '%s' not known" %
1984
                                 self.op.instance_name)
1985

    
1986
    # check bridges existance
1987
    _CheckInstanceBridgesExist(instance)
1988

    
1989
    _CheckNodeFreeMemory(self.cfg, instance.primary_node,
1990
                         "starting instance %s" % instance.name,
1991
                         instance.memory)
1992

    
1993
    self.instance = instance
1994
    self.op.instance_name = instance.name
1995

    
1996
  def Exec(self, feedback_fn):
1997
    """Start the instance.
1998

1999
    """
2000
    instance = self.instance
2001
    force = self.op.force
2002
    extra_args = getattr(self.op, "extra_args", "")
2003

    
2004
    self.cfg.MarkInstanceUp(instance.name)
2005

    
2006
    node_current = instance.primary_node
2007

    
2008
    _StartInstanceDisks(self.cfg, instance, force)
2009

    
2010
    if not rpc.call_instance_start(node_current, instance, extra_args):
2011
      _ShutdownInstanceDisks(instance, self.cfg)
2012
      raise errors.OpExecError("Could not start instance")
2013

    
2014

    
2015
class LURebootInstance(LogicalUnit):
2016
  """Reboot an instance.
2017

2018
  """
2019
  HPATH = "instance-reboot"
2020
  HTYPE = constants.HTYPE_INSTANCE
2021
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
2022

    
2023
  def BuildHooksEnv(self):
2024
    """Build hooks env.
2025

2026
    This runs on master, primary and secondary nodes of the instance.
2027

2028
    """
2029
    env = {
2030
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
2031
      }
2032
    env.update(_BuildInstanceHookEnvByObject(self.instance))
2033
    nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
2034
          list(self.instance.secondary_nodes))
2035
    return env, nl, nl
2036

    
2037
  def CheckPrereq(self):
2038
    """Check prerequisites.
2039

2040
    This checks that the instance is in the cluster.
2041

2042
    """
2043
    instance = self.cfg.GetInstanceInfo(
2044
      self.cfg.ExpandInstanceName(self.op.instance_name))
2045
    if instance is None:
2046
      raise errors.OpPrereqError("Instance '%s' not known" %
2047
                                 self.op.instance_name)
2048

    
2049
    # check bridges existance
2050
    _CheckInstanceBridgesExist(instance)
2051

    
2052
    self.instance = instance
2053
    self.op.instance_name = instance.name
2054

    
2055
  def Exec(self, feedback_fn):
2056
    """Reboot the instance.
2057

2058
    """
2059
    instance = self.instance
2060
    ignore_secondaries = self.op.ignore_secondaries
2061
    reboot_type = self.op.reboot_type
2062
    extra_args = getattr(self.op, "extra_args", "")
2063

    
2064
    node_current = instance.primary_node
2065

    
2066
    if reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
2067
                           constants.INSTANCE_REBOOT_HARD,
2068
                           constants.INSTANCE_REBOOT_FULL]:
2069
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
2070
                                  (constants.INSTANCE_REBOOT_SOFT,
2071
                                   constants.INSTANCE_REBOOT_HARD,
2072
                                   constants.INSTANCE_REBOOT_FULL))
2073

    
2074
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
2075
                       constants.INSTANCE_REBOOT_HARD]:
2076
      if not rpc.call_instance_reboot(node_current, instance,
2077
                                      reboot_type, extra_args):
2078
        raise errors.OpExecError("Could not reboot instance")
2079
    else:
2080
      if not rpc.call_instance_shutdown(node_current, instance):
2081
        raise errors.OpExecError("could not shutdown instance for full reboot")
2082
      _ShutdownInstanceDisks(instance, self.cfg)
2083
      _StartInstanceDisks(self.cfg, instance, ignore_secondaries)
2084
      if not rpc.call_instance_start(node_current, instance, extra_args):
2085
        _ShutdownInstanceDisks(instance, self.cfg)
2086
        raise errors.OpExecError("Could not start instance for full reboot")
2087

    
2088
    self.cfg.MarkInstanceUp(instance.name)
2089

    
2090

    
2091
class LUShutdownInstance(LogicalUnit):
2092
  """Shutdown an instance.
2093

2094
  """
2095
  HPATH = "instance-stop"
2096
  HTYPE = constants.HTYPE_INSTANCE
2097
  _OP_REQP = ["instance_name"]
2098

    
2099
  def BuildHooksEnv(self):
2100
    """Build hooks env.
2101

2102
    This runs on master, primary and secondary nodes of the instance.
2103

2104
    """
2105
    env = _BuildInstanceHookEnvByObject(self.instance)
2106
    nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
2107
          list(self.instance.secondary_nodes))
2108
    return env, nl, nl
2109

    
2110
  def CheckPrereq(self):
2111
    """Check prerequisites.
2112

2113
    This checks that the instance is in the cluster.
2114

2115
    """
2116
    instance = self.cfg.GetInstanceInfo(
2117
      self.cfg.ExpandInstanceName(self.op.instance_name))
2118
    if instance is None:
2119
      raise errors.OpPrereqError("Instance '%s' not known" %
2120
                                 self.op.instance_name)
2121
    self.instance = instance
2122

    
2123
  def Exec(self, feedback_fn):
2124
    """Shutdown the instance.
2125

2126
    """
2127
    instance = self.instance
2128
    node_current = instance.primary_node
2129
    self.cfg.MarkInstanceDown(instance.name)
2130
    if not rpc.call_instance_shutdown(node_current, instance):
2131
      logger.Error("could not shutdown instance")
2132

    
2133
    _ShutdownInstanceDisks(instance, self.cfg)
2134

    
2135

    
2136
class LUReinstallInstance(LogicalUnit):
2137
  """Reinstall an instance.
2138

2139
  """
2140
  HPATH = "instance-reinstall"
2141
  HTYPE = constants.HTYPE_INSTANCE
2142
  _OP_REQP = ["instance_name"]
2143

    
2144
  def BuildHooksEnv(self):
2145
    """Build hooks env.
2146

2147
    This runs on master, primary and secondary nodes of the instance.
2148

2149
    """
2150
    env = _BuildInstanceHookEnvByObject(self.instance)
2151
    nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
2152
          list(self.instance.secondary_nodes))
2153
    return env, nl, nl
2154

    
2155
  def CheckPrereq(self):
2156
    """Check prerequisites.
2157

2158
    This checks that the instance is in the cluster and is not running.
2159

2160
    """
2161
    instance = self.cfg.GetInstanceInfo(
2162
      self.cfg.ExpandInstanceName(self.op.instance_name))
2163
    if instance is None:
2164
      raise errors.OpPrereqError("Instance '%s' not known" %
2165
                                 self.op.instance_name)
2166
    if instance.disk_template == constants.DT_DISKLESS:
2167
      raise errors.OpPrereqError("Instance '%s' has no disks" %
2168
                                 self.op.instance_name)
2169
    if instance.status != "down":
2170
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
2171
                                 self.op.instance_name)
2172
    remote_info = rpc.call_instance_info(instance.primary_node, instance.name)
2173
    if remote_info:
2174
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
2175
                                 (self.op.instance_name,
2176
                                  instance.primary_node))
2177

    
2178
    self.op.os_type = getattr(self.op, "os_type", None)
2179
    if self.op.os_type is not None:
2180
      # OS verification
2181
      pnode = self.cfg.GetNodeInfo(
2182
        self.cfg.ExpandNodeName(instance.primary_node))
2183
      if pnode is None:
2184
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
2185
                                   self.op.pnode)
2186
      os_obj = rpc.call_os_get(pnode.name, self.op.os_type)
2187
      if not os_obj:
2188
        raise errors.OpPrereqError("OS '%s' not in supported OS list for"
2189
                                   " primary node"  % self.op.os_type)
2190

    
2191
    self.instance = instance
2192

    
2193
  def Exec(self, feedback_fn):
2194
    """Reinstall the instance.
2195

2196
    """
2197
    inst = self.instance
2198

    
2199
    if self.op.os_type is not None:
2200
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
2201
      inst.os = self.op.os_type
2202
      self.cfg.AddInstance(inst)
2203

    
2204
    _StartInstanceDisks(self.cfg, inst, None)
2205
    try:
2206
      feedback_fn("Running the instance OS create scripts...")
2207
      if not rpc.call_instance_os_add(inst.primary_node, inst, "sda", "sdb"):
2208
        raise errors.OpExecError("Could not install OS for instance %s"
2209
                                 " on node %s" %
2210
                                 (inst.name, inst.primary_node))
2211
    finally:
2212
      _ShutdownInstanceDisks(inst, self.cfg)
2213

    
2214

    
2215
class LURenameInstance(LogicalUnit):
2216
  """Rename an instance.
2217

2218
  """
2219
  HPATH = "instance-rename"
2220
  HTYPE = constants.HTYPE_INSTANCE
2221
  _OP_REQP = ["instance_name", "new_name"]
2222

    
2223
  def BuildHooksEnv(self):
2224
    """Build hooks env.
2225

2226
    This runs on master, primary and secondary nodes of the instance.
2227

2228
    """
2229
    env = _BuildInstanceHookEnvByObject(self.instance)
2230
    env["INSTANCE_NEW_NAME"] = self.op.new_name
2231
    nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
2232
          list(self.instance.secondary_nodes))
2233
    return env, nl, nl
2234

    
2235
  def CheckPrereq(self):
2236
    """Check prerequisites.
2237

2238
    This checks that the instance is in the cluster and is not running.
2239

2240
    """
2241
    instance = self.cfg.GetInstanceInfo(
2242
      self.cfg.ExpandInstanceName(self.op.instance_name))
2243
    if instance is None:
2244
      raise errors.OpPrereqError("Instance '%s' not known" %
2245
                                 self.op.instance_name)
2246
    if instance.status != "down":
2247
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
2248
                                 self.op.instance_name)
2249
    remote_info = rpc.call_instance_info(instance.primary_node, instance.name)
2250
    if remote_info:
2251
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
2252
                                 (self.op.instance_name,
2253
                                  instance.primary_node))
2254
    self.instance = instance
2255

    
2256
    # new name verification
2257
    name_info = utils.HostInfo(self.op.new_name)
2258

    
2259
    self.op.new_name = new_name = name_info.name
2260
    instance_list = self.cfg.GetInstanceList()
2261
    if new_name in instance_list:
2262
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
2263
                                 new_name)
2264

    
2265
    if not getattr(self.op, "ignore_ip", False):
2266
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
2267
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
2268
                                   (name_info.ip, new_name))
2269

    
2270

    
2271
  def Exec(self, feedback_fn):
2272
    """Reinstall the instance.
2273

2274
    """
2275
    inst = self.instance
2276
    old_name = inst.name
2277

    
2278
    if inst.disk_template == constants.DT_FILE:
2279
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
2280

    
2281
    self.cfg.RenameInstance(inst.name, self.op.new_name)
2282

    
2283
    # re-read the instance from the configuration after rename
2284
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
2285

    
2286
    if inst.disk_template == constants.DT_FILE:
2287
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
2288
      result = rpc.call_file_storage_dir_rename(inst.primary_node,
2289
                                                old_file_storage_dir,
2290
                                                new_file_storage_dir)
2291

    
2292
      if not result:
2293
        raise errors.OpExecError("Could not connect to node '%s' to rename"
2294
                                 " directory '%s' to '%s' (but the instance"
2295
                                 " has been renamed in Ganeti)" % (
2296
                                 inst.primary_node, old_file_storage_dir,
2297
                                 new_file_storage_dir))
2298

    
2299
      if not result[0]:
2300
        raise errors.OpExecError("Could not rename directory '%s' to '%s'"
2301
                                 " (but the instance has been renamed in"
2302
                                 " Ganeti)" % (old_file_storage_dir,
2303
                                               new_file_storage_dir))
2304

    
2305
    _StartInstanceDisks(self.cfg, inst, None)
2306
    try:
2307
      if not rpc.call_instance_run_rename(inst.primary_node, inst, old_name,
2308
                                          "sda", "sdb"):
2309
        msg = ("Could run OS rename script for instance %s on node %s (but the"
2310
               " instance has been renamed in Ganeti)" %
2311
               (inst.name, inst.primary_node))
2312
        logger.Error(msg)
2313
    finally:
2314
      _ShutdownInstanceDisks(inst, self.cfg)
2315

    
2316

    
2317
class LURemoveInstance(LogicalUnit):
2318
  """Remove an instance.
2319

2320
  """
2321
  HPATH = "instance-remove"
2322
  HTYPE = constants.HTYPE_INSTANCE
2323
  _OP_REQP = ["instance_name", "ignore_failures"]
2324

    
2325
  def BuildHooksEnv(self):
2326
    """Build hooks env.
2327

2328
    This runs on master, primary and secondary nodes of the instance.
2329

2330
    """
2331
    env = _BuildInstanceHookEnvByObject(self.instance)
2332
    nl = [self.sstore.GetMasterNode()]
2333
    return env, nl, nl
2334

    
2335
  def CheckPrereq(self):
2336
    """Check prerequisites.
2337

2338
    This checks that the instance is in the cluster.
2339

2340
    """
2341
    instance = self.cfg.GetInstanceInfo(
2342
      self.cfg.ExpandInstanceName(self.op.instance_name))
2343
    if instance is None:
2344
      raise errors.OpPrereqError("Instance '%s' not known" %
2345
                                 self.op.instance_name)
2346
    self.instance = instance
2347

    
2348
  def Exec(self, feedback_fn):
2349
    """Remove the instance.
2350

2351
    """
2352
    instance = self.instance
2353
    logger.Info("shutting down instance %s on node %s" %
2354
                (instance.name, instance.primary_node))
2355

    
2356
    if not rpc.call_instance_shutdown(instance.primary_node, instance):
2357
      if self.op.ignore_failures:
2358
        feedback_fn("Warning: can't shutdown instance")
2359
      else:
2360
        raise errors.OpExecError("Could not shutdown instance %s on node %s" %
2361
                                 (instance.name, instance.primary_node))
2362

    
2363
    logger.Info("removing block devices for instance %s" % instance.name)
2364

    
2365
    if not _RemoveDisks(instance, self.cfg):
2366
      if self.op.ignore_failures:
2367
        feedback_fn("Warning: can't remove instance's disks")
2368
      else:
2369
        raise errors.OpExecError("Can't remove instance's disks")
2370

    
2371
    logger.Info("removing instance %s out of cluster config" % instance.name)
2372

    
2373
    self.cfg.RemoveInstance(instance.name)
2374
    # Remove the new instance from the Ganeti Lock Manager
2375
    self.context.glm.remove(locking.LEVEL_INSTANCE, instance.name)
2376

    
2377

    
2378
class LUQueryInstances(NoHooksLU):
2379
  """Logical unit for querying instances.
2380

2381
  """
2382
  _OP_REQP = ["output_fields", "names"]
2383

    
2384
  def CheckPrereq(self):
2385
    """Check prerequisites.
2386

2387
    This checks that the fields required are valid output fields.
2388

2389
    """
2390
    self.dynamic_fields = frozenset(["oper_state", "oper_ram", "status"])
2391
    _CheckOutputFields(static=["name", "os", "pnode", "snodes",
2392
                               "admin_state", "admin_ram",
2393
                               "disk_template", "ip", "mac", "bridge",
2394
                               "sda_size", "sdb_size", "vcpus", "tags"],
2395
                       dynamic=self.dynamic_fields,
2396
                       selected=self.op.output_fields)
2397

    
2398
    self.wanted = _GetWantedInstances(self, self.op.names)
2399

    
2400
  def Exec(self, feedback_fn):
2401
    """Computes the list of nodes and their attributes.
2402

2403
    """
2404
    instance_names = self.wanted
2405
    instance_list = [self.cfg.GetInstanceInfo(iname) for iname
2406
                     in instance_names]
2407

    
2408
    # begin data gathering
2409

    
2410
    nodes = frozenset([inst.primary_node for inst in instance_list])
2411

    
2412
    bad_nodes = []
2413
    if self.dynamic_fields.intersection(self.op.output_fields):
2414
      live_data = {}
2415
      node_data = rpc.call_all_instances_info(nodes)
2416
      for name in nodes:
2417
        result = node_data[name]
2418
        if result:
2419
          live_data.update(result)
2420
        elif result == False:
2421
          bad_nodes.append(name)
2422
        # else no instance is alive
2423
    else:
2424
      live_data = dict([(name, {}) for name in instance_names])
2425

    
2426
    # end data gathering
2427

    
2428
    output = []
2429
    for instance in instance_list:
2430
      iout = []
2431
      for field in self.op.output_fields:
2432
        if field == "name":
2433
          val = instance.name
2434
        elif field == "os":
2435
          val = instance.os
2436
        elif field == "pnode":
2437
          val = instance.primary_node
2438
        elif field == "snodes":
2439
          val = list(instance.secondary_nodes)
2440
        elif field == "admin_state":
2441
          val = (instance.status != "down")
2442
        elif field == "oper_state":
2443
          if instance.primary_node in bad_nodes:
2444
            val = None
2445
          else:
2446
            val = bool(live_data.get(instance.name))
2447
        elif field == "status":
2448
          if instance.primary_node in bad_nodes:
2449
            val = "ERROR_nodedown"
2450
          else:
2451
            running = bool(live_data.get(instance.name))
2452
            if running:
2453
              if instance.status != "down":
2454
                val = "running"
2455
              else:
2456
                val = "ERROR_up"
2457
            else:
2458
              if instance.status != "down":
2459
                val = "ERROR_down"
2460
              else:
2461
                val = "ADMIN_down"
2462
        elif field == "admin_ram":
2463
          val = instance.memory
2464
        elif field == "oper_ram":
2465
          if instance.primary_node in bad_nodes:
2466
            val = None
2467
          elif instance.name in live_data:
2468
            val = live_data[instance.name].get("memory", "?")
2469
          else:
2470
            val = "-"
2471
        elif field == "disk_template":
2472
          val = instance.disk_template
2473
        elif field == "ip":
2474
          val = instance.nics[0].ip
2475
        elif field == "bridge":
2476
          val = instance.nics[0].bridge
2477
        elif field == "mac":
2478
          val = instance.nics[0].mac
2479
        elif field == "sda_size" or field == "sdb_size":
2480
          disk = instance.FindDisk(field[:3])
2481
          if disk is None:
2482
            val = None
2483
          else:
2484
            val = disk.size
2485
        elif field == "vcpus":
2486
          val = instance.vcpus
2487
        elif field == "tags":
2488
          val = list(instance.GetTags())
2489
        else:
2490
          raise errors.ParameterError(field)
2491
        iout.append(val)
2492
      output.append(iout)
2493

    
2494
    return output
2495

    
2496

    
2497
class LUFailoverInstance(LogicalUnit):
2498
  """Failover an instance.
2499

2500
  """
2501
  HPATH = "instance-failover"
2502
  HTYPE = constants.HTYPE_INSTANCE
2503
  _OP_REQP = ["instance_name", "ignore_consistency"]
2504

    
2505
  def BuildHooksEnv(self):
2506
    """Build hooks env.
2507

2508
    This runs on master, primary and secondary nodes of the instance.
2509

2510
    """
2511
    env = {
2512
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
2513
      }
2514
    env.update(_BuildInstanceHookEnvByObject(self.instance))
2515
    nl = [self.sstore.GetMasterNode()] + list(self.instance.secondary_nodes)
2516
    return env, nl, nl
2517

    
2518
  def CheckPrereq(self):
2519
    """Check prerequisites.
2520

2521
    This checks that the instance is in the cluster.
2522

2523
    """
2524
    instance = self.cfg.GetInstanceInfo(
2525
      self.cfg.ExpandInstanceName(self.op.instance_name))
2526
    if instance is None:
2527
      raise errors.OpPrereqError("Instance '%s' not known" %
2528
                                 self.op.instance_name)
2529

    
2530
    if instance.disk_template not in constants.DTS_NET_MIRROR:
2531
      raise errors.OpPrereqError("Instance's disk layout is not"
2532
                                 " network mirrored, cannot failover.")
2533

    
2534
    secondary_nodes = instance.secondary_nodes
2535
    if not secondary_nodes:
2536
      raise errors.ProgrammerError("no secondary node but using "
2537
                                   "a mirrored disk template")
2538

    
2539
    target_node = secondary_nodes[0]
2540
    # check memory requirements on the secondary node
2541
    _CheckNodeFreeMemory(self.cfg, target_node, "failing over instance %s" %
2542
                         instance.name, instance.memory)
2543

    
2544
    # check bridge existance
2545
    brlist = [nic.bridge for nic in instance.nics]
2546
    if not rpc.call_bridges_exist(target_node, brlist):
2547
      raise errors.OpPrereqError("One or more target bridges %s does not"
2548
                                 " exist on destination node '%s'" %
2549
                                 (brlist, target_node))
2550

    
2551
    self.instance = instance
2552

    
2553
  def Exec(self, feedback_fn):
2554
    """Failover an instance.
2555

2556
    The failover is done by shutting it down on its present node and
2557
    starting it on the secondary.
2558

2559
    """
2560
    instance = self.instance
2561

    
2562
    source_node = instance.primary_node
2563
    target_node = instance.secondary_nodes[0]
2564

    
2565
    feedback_fn("* checking disk consistency between source and target")
2566
    for dev in instance.disks:
2567
      # for drbd, these are drbd over lvm
2568
      if not _CheckDiskConsistency(self.cfg, dev, target_node, False):
2569
        if instance.status == "up" and not self.op.ignore_consistency:
2570
          raise errors.OpExecError("Disk %s is degraded on target node,"
2571
                                   " aborting failover." % dev.iv_name)
2572

    
2573
    feedback_fn("* shutting down instance on source node")
2574
    logger.Info("Shutting down instance %s on node %s" %
2575
                (instance.name, source_node))
2576

    
2577
    if not rpc.call_instance_shutdown(source_node, instance):
2578
      if self.op.ignore_consistency:
2579
        logger.Error("Could not shutdown instance %s on node %s. Proceeding"
2580
                     " anyway. Please make sure node %s is down"  %
2581
                     (instance.name, source_node, source_node))
2582
      else:
2583
        raise errors.OpExecError("Could not shutdown instance %s on node %s" %
2584
                                 (instance.name, source_node))
2585

    
2586
    feedback_fn("* deactivating the instance's disks on source node")
2587
    if not _ShutdownInstanceDisks(instance, self.cfg, ignore_primary=True):
2588
      raise errors.OpExecError("Can't shut down the instance's disks.")
2589

    
2590
    instance.primary_node = target_node
2591
    # distribute new instance config to the other nodes
2592
    self.cfg.Update(instance)
2593

    
2594
    # Only start the instance if it's marked as up
2595
    if instance.status == "up":
2596
      feedback_fn("* activating the instance's disks on target node")
2597
      logger.Info("Starting instance %s on node %s" %
2598
                  (instance.name, target_node))
2599

    
2600
      disks_ok, dummy = _AssembleInstanceDisks(instance, self.cfg,
2601
                                               ignore_secondaries=True)
2602
      if not disks_ok:
2603
        _ShutdownInstanceDisks(instance, self.cfg)
2604
        raise errors.OpExecError("Can't activate the instance's disks")
2605

    
2606
      feedback_fn("* starting the instance on the target node")
2607
      if not rpc.call_instance_start(target_node, instance, None):
2608
        _ShutdownInstanceDisks(instance, self.cfg)
2609
        raise errors.OpExecError("Could not start instance %s on node %s." %
2610
                                 (instance.name, target_node))
2611

    
2612

    
2613
def _CreateBlockDevOnPrimary(cfg, node, instance, device, info):
2614
  """Create a tree of block devices on the primary node.
2615

2616
  This always creates all devices.
2617

2618
  """
2619
  if device.children:
2620
    for child in device.children:
2621
      if not _CreateBlockDevOnPrimary(cfg, node, instance, child, info):
2622
        return False
2623

    
2624
  cfg.SetDiskID(device, node)
2625
  new_id = rpc.call_blockdev_create(node, device, device.size,
2626
                                    instance.name, True, info)
2627
  if not new_id:
2628
    return False
2629
  if device.physical_id is None:
2630
    device.physical_id = new_id
2631
  return True
2632

    
2633

    
2634
def _CreateBlockDevOnSecondary(cfg, node, instance, device, force, info):
2635
  """Create a tree of block devices on a secondary node.
2636

2637
  If this device type has to be created on secondaries, create it and
2638
  all its children.
2639

2640
  If not, just recurse to children keeping the same 'force' value.
2641

2642
  """
2643
  if device.CreateOnSecondary():
2644
    force = True
2645
  if device.children:
2646
    for child in device.children:
2647
      if not _CreateBlockDevOnSecondary(cfg, node, instance,
2648
                                        child, force, info):
2649
        return False
2650

    
2651
  if not force:
2652
    return True
2653
  cfg.SetDiskID(device, node)
2654
  new_id = rpc.call_blockdev_create(node, device, device.size,
2655
                                    instance.name, False, info)
2656
  if not new_id:
2657
    return False
2658
  if device.physical_id is None:
2659
    device.physical_id = new_id
2660
  return True
2661

    
2662

    
2663
def _GenerateUniqueNames(cfg, exts):
2664
  """Generate a suitable LV name.
2665

2666
  This will generate a logical volume name for the given instance.
2667

2668
  """
2669
  results = []
2670
  for val in exts:
2671
    new_id = cfg.GenerateUniqueID()
2672
    results.append("%s%s" % (new_id, val))
2673
  return results
2674

    
2675

    
2676
def _GenerateDRBD8Branch(cfg, primary, secondary, size, names, iv_name):
2677
  """Generate a drbd8 device complete with its children.
2678

2679
  """
2680
  port = cfg.AllocatePort()
2681
  vgname = cfg.GetVGName()
2682
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
2683
                          logical_id=(vgname, names[0]))
2684
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
2685
                          logical_id=(vgname, names[1]))
2686
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
2687
                          logical_id = (primary, secondary, port),
2688
                          children = [dev_data, dev_meta],
2689
                          iv_name=iv_name)
2690
  return drbd_dev
2691

    
2692

    
2693
def _GenerateDiskTemplate(cfg, template_name,
2694
                          instance_name, primary_node,
2695
                          secondary_nodes, disk_sz, swap_sz,
2696
                          file_storage_dir, file_driver):
2697
  """Generate the entire disk layout for a given template type.
2698

2699
  """
2700
  #TODO: compute space requirements
2701

    
2702
  vgname = cfg.GetVGName()
2703
  if template_name == constants.DT_DISKLESS:
2704
    disks = []
2705
  elif template_name == constants.DT_PLAIN:
2706
    if len(secondary_nodes) != 0:
2707
      raise errors.ProgrammerError("Wrong template configuration")
2708

    
2709
    names = _GenerateUniqueNames(cfg, [".sda", ".sdb"])
2710
    sda_dev = objects.Disk(dev_type=constants.LD_LV, size=disk_sz,
2711
                           logical_id=(vgname, names[0]),
2712
                           iv_name = "sda")
2713
    sdb_dev = objects.Disk(dev_type=constants.LD_LV, size=swap_sz,
2714
                           logical_id=(vgname, names[1]),
2715
                           iv_name = "sdb")
2716
    disks = [sda_dev, sdb_dev]
2717
  elif template_name == constants.DT_DRBD8:
2718
    if len(secondary_nodes) != 1:
2719
      raise errors.ProgrammerError("Wrong template configuration")
2720
    remote_node = secondary_nodes[0]
2721
    names = _GenerateUniqueNames(cfg, [".sda_data", ".sda_meta",
2722
                                       ".sdb_data", ".sdb_meta"])
2723
    drbd_sda_dev = _GenerateDRBD8Branch(cfg, primary_node, remote_node,
2724
                                         disk_sz, names[0:2], "sda")
2725
    drbd_sdb_dev = _GenerateDRBD8Branch(cfg, primary_node, remote_node,
2726
                                         swap_sz, names[2:4], "sdb")
2727
    disks = [drbd_sda_dev, drbd_sdb_dev]
2728
  elif template_name == constants.DT_FILE:
2729
    if len(secondary_nodes) != 0:
2730
      raise errors.ProgrammerError("Wrong template configuration")
2731

    
2732
    file_sda_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk_sz,
2733
                                iv_name="sda", logical_id=(file_driver,
2734
                                "%s/sda" % file_storage_dir))
2735
    file_sdb_dev = objects.Disk(dev_type=constants.LD_FILE, size=swap_sz,
2736
                                iv_name="sdb", logical_id=(file_driver,
2737
                                "%s/sdb" % file_storage_dir))
2738
    disks = [file_sda_dev, file_sdb_dev]
2739
  else:
2740
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
2741
  return disks
2742

    
2743

    
2744
def _GetInstanceInfoText(instance):
2745
  """Compute that text that should be added to the disk's metadata.
2746

2747
  """
2748
  return "originstname+%s" % instance.name
2749

    
2750

    
2751
def _CreateDisks(cfg, instance):
2752
  """Create all disks for an instance.
2753

2754
  This abstracts away some work from AddInstance.
2755

2756
  Args:
2757
    instance: the instance object
2758

2759
  Returns:
2760
    True or False showing the success of the creation process
2761

2762
  """
2763
  info = _GetInstanceInfoText(instance)
2764

    
2765
  if instance.disk_template == constants.DT_FILE:
2766
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
2767
    result = rpc.call_file_storage_dir_create(instance.primary_node,
2768
                                              file_storage_dir)
2769

    
2770
    if not result:
2771
      logger.Error("Could not connect to node '%s'" % instance.primary_node)
2772
      return False
2773

    
2774
    if not result[0]:
2775
      logger.Error("failed to create directory '%s'" % file_storage_dir)
2776
      return False
2777

    
2778
  for device in instance.disks:
2779
    logger.Info("creating volume %s for instance %s" %
2780
                (device.iv_name, instance.name))
2781
    #HARDCODE
2782
    for secondary_node in instance.secondary_nodes:
2783
      if not _CreateBlockDevOnSecondary(cfg, secondary_node, instance,
2784
                                        device, False, info):
2785
        logger.Error("failed to create volume %s (%s) on secondary node %s!" %
2786
                     (device.iv_name, device, secondary_node))
2787
        return False
2788
    #HARDCODE
2789
    if not _CreateBlockDevOnPrimary(cfg, instance.primary_node,
2790
                                    instance, device, info):
2791
      logger.Error("failed to create volume %s on primary!" %
2792
                   device.iv_name)
2793
      return False
2794

    
2795
  return True
2796

    
2797

    
2798
def _RemoveDisks(instance, cfg):
2799
  """Remove all disks for an instance.
2800

2801
  This abstracts away some work from `AddInstance()` and
2802
  `RemoveInstance()`. Note that in case some of the devices couldn't
2803
  be removed, the removal will continue with the other ones (compare
2804
  with `_CreateDisks()`).
2805

2806
  Args:
2807
    instance: the instance object
2808

2809
  Returns:
2810
    True or False showing the success of the removal proces
2811

2812
  """
2813
  logger.Info("removing block devices for instance %s" % instance.name)
2814

    
2815
  result = True
2816
  for device in instance.disks:
2817
    for node, disk in device.ComputeNodeTree(instance.primary_node):
2818
      cfg.SetDiskID(disk, node)
2819
      if not rpc.call_blockdev_remove(node, disk):
2820
        logger.Error("could not remove block device %s on node %s,"
2821
                     " continuing anyway" %
2822
                     (device.iv_name, node))
2823
        result = False
2824

    
2825
  if instance.disk_template == constants.DT_FILE:
2826
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
2827
    if not rpc.call_file_storage_dir_remove(instance.primary_node,
2828
                                            file_storage_dir):
2829
      logger.Error("could not remove directory '%s'" % file_storage_dir)
2830
      result = False
2831

    
2832
  return result
2833

    
2834

    
2835
def _ComputeDiskSize(disk_template, disk_size, swap_size):
2836
  """Compute disk size requirements in the volume group
2837

2838
  This is currently hard-coded for the two-drive layout.
2839

2840
  """
2841
  # Required free disk space as a function of disk and swap space
2842
  req_size_dict = {
2843
    constants.DT_DISKLESS: None,
2844
    constants.DT_PLAIN: disk_size + swap_size,
2845
    # 256 MB are added for drbd metadata, 128MB for each drbd device
2846
    constants.DT_DRBD8: disk_size + swap_size + 256,
2847
    constants.DT_FILE: None,
2848
  }
2849

    
2850
  if disk_template not in req_size_dict:
2851
    raise errors.ProgrammerError("Disk template '%s' size requirement"
2852
                                 " is unknown" %  disk_template)
2853

    
2854
  return req_size_dict[disk_template]
2855

    
2856

    
2857
class LUCreateInstance(LogicalUnit):
2858
  """Create an instance.
2859

2860
  """
2861
  HPATH = "instance-add"
2862
  HTYPE = constants.HTYPE_INSTANCE
2863
  _OP_REQP = ["instance_name", "mem_size", "disk_size",
2864
              "disk_template", "swap_size", "mode", "start", "vcpus",
2865
              "wait_for_sync", "ip_check", "mac"]
2866

    
2867
  def _RunAllocator(self):
2868
    """Run the allocator based on input opcode.
2869

2870
    """
2871
    disks = [{"size": self.op.disk_size, "mode": "w"},
2872
             {"size": self.op.swap_size, "mode": "w"}]
2873
    nics = [{"mac": self.op.mac, "ip": getattr(self.op, "ip", None),
2874
             "bridge": self.op.bridge}]
2875
    ial = IAllocator(self.cfg, self.sstore,
2876
                     mode=constants.IALLOCATOR_MODE_ALLOC,
2877
                     name=self.op.instance_name,
2878
                     disk_template=self.op.disk_template,
2879
                     tags=[],
2880
                     os=self.op.os_type,
2881
                     vcpus=self.op.vcpus,
2882
                     mem_size=self.op.mem_size,
2883
                     disks=disks,
2884
                     nics=nics,
2885
                     )
2886

    
2887
    ial.Run(self.op.iallocator)
2888

    
2889
    if not ial.success:
2890
      raise errors.OpPrereqError("Can't compute nodes using"
2891
                                 " iallocator '%s': %s" % (self.op.iallocator,
2892
                                                           ial.info))
2893
    if len(ial.nodes) != ial.required_nodes:
2894
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
2895
                                 " of nodes (%s), required %s" %
2896
                                 (len(ial.nodes), ial.required_nodes))
2897
    self.op.pnode = ial.nodes[0]
2898
    logger.ToStdout("Selected nodes for the instance: %s" %
2899
                    (", ".join(ial.nodes),))
2900
    logger.Info("Selected nodes for instance %s via iallocator %s: %s" %
2901
                (self.op.instance_name, self.op.iallocator, ial.nodes))
2902
    if ial.required_nodes == 2:
2903
      self.op.snode = ial.nodes[1]
2904

    
2905
  def BuildHooksEnv(self):
2906
    """Build hooks env.
2907

2908
    This runs on master, primary and secondary nodes of the instance.
2909

2910
    """
2911
    env = {
2912
      "INSTANCE_DISK_TEMPLATE": self.op.disk_template,
2913
      "INSTANCE_DISK_SIZE": self.op.disk_size,
2914
      "INSTANCE_SWAP_SIZE": self.op.swap_size,
2915
      "INSTANCE_ADD_MODE": self.op.mode,
2916
      }
2917
    if self.op.mode == constants.INSTANCE_IMPORT:
2918
      env["INSTANCE_SRC_NODE"] = self.op.src_node
2919
      env["INSTANCE_SRC_PATH"] = self.op.src_path
2920
      env["INSTANCE_SRC_IMAGE"] = self.src_image
2921

    
2922
    env.update(_BuildInstanceHookEnv(name=self.op.instance_name,
2923
      primary_node=self.op.pnode,
2924
      secondary_nodes=self.secondaries,
2925
      status=self.instance_status,
2926
      os_type=self.op.os_type,
2927
      memory=self.op.mem_size,
2928
      vcpus=self.op.vcpus,
2929
      nics=[(self.inst_ip, self.op.bridge, self.op.mac)],
2930
    ))
2931

    
2932
    nl = ([self.sstore.GetMasterNode(), self.op.pnode] +
2933
          self.secondaries)
2934
    return env, nl, nl
2935

    
2936

    
2937
  def CheckPrereq(self):
2938
    """Check prerequisites.
2939

2940
    """
2941
    # set optional parameters to none if they don't exist
2942
    for attr in ["kernel_path", "initrd_path", "hvm_boot_order", "pnode",
2943
                 "iallocator", "hvm_acpi", "hvm_pae", "hvm_cdrom_image_path",
2944
                 "vnc_bind_address"]:
2945
      if not hasattr(self.op, attr):
2946
        setattr(self.op, attr, None)
2947

    
2948
    if self.op.mode not in (constants.INSTANCE_CREATE,
2949
                            constants.INSTANCE_IMPORT):
2950
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
2951
                                 self.op.mode)
2952

    
2953
    if (not self.cfg.GetVGName() and
2954
        self.op.disk_template not in constants.DTS_NOT_LVM):
2955
      raise errors.OpPrereqError("Cluster does not support lvm-based"
2956
                                 " instances")
2957

    
2958
    if self.op.mode == constants.INSTANCE_IMPORT:
2959
      src_node = getattr(self.op, "src_node", None)
2960
      src_path = getattr(self.op, "src_path", None)
2961
      if src_node is None or src_path is None:
2962
        raise errors.OpPrereqError("Importing an instance requires source"
2963
                                   " node and path options")
2964
      src_node_full = self.cfg.ExpandNodeName(src_node)
2965
      if src_node_full is None:
2966
        raise errors.OpPrereqError("Unknown source node '%s'" % src_node)
2967
      self.op.src_node = src_node = src_node_full
2968

    
2969
      if not os.path.isabs(src_path):
2970
        raise errors.OpPrereqError("The source path must be absolute")
2971

    
2972
      export_info = rpc.call_export_info(src_node, src_path)
2973

    
2974
      if not export_info:
2975
        raise errors.OpPrereqError("No export found in dir %s" % src_path)
2976

    
2977
      if not export_info.has_section(constants.INISECT_EXP):
2978
        raise errors.ProgrammerError("Corrupted export config")
2979

    
2980
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
2981
      if (int(ei_version) != constants.EXPORT_VERSION):
2982
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
2983
                                   (ei_version, constants.EXPORT_VERSION))
2984

    
2985
      if int(export_info.get(constants.INISECT_INS, 'disk_count')) > 1:
2986
        raise errors.OpPrereqError("Can't import instance with more than"
2987
                                   " one data disk")
2988

    
2989
      # FIXME: are the old os-es, disk sizes, etc. useful?
2990
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
2991
      diskimage = os.path.join(src_path, export_info.get(constants.INISECT_INS,
2992
                                                         'disk0_dump'))
2993
      self.src_image = diskimage
2994
    else: # INSTANCE_CREATE
2995
      if getattr(self.op, "os_type", None) is None:
2996
        raise errors.OpPrereqError("No guest OS specified")
2997

    
2998
    #### instance parameters check
2999

    
3000
    # disk template and mirror node verification
3001
    if self.op.disk_template not in constants.DISK_TEMPLATES:
3002
      raise errors.OpPrereqError("Invalid disk template name")
3003

    
3004
    # instance name verification
3005
    hostname1 = utils.HostInfo(self.op.instance_name)
3006

    
3007
    self.op.instance_name = instance_name = hostname1.name
3008
    instance_list = self.cfg.GetInstanceList()
3009
    if instance_name in instance_list:
3010
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3011
                                 instance_name)
3012

    
3013
    # ip validity checks
3014
    ip = getattr(self.op, "ip", None)
3015
    if ip is None or ip.lower() == "none":
3016
      inst_ip = None
3017
    elif ip.lower() == "auto":
3018
      inst_ip = hostname1.ip
3019
    else:
3020
      if not utils.IsValidIP(ip):
3021
        raise errors.OpPrereqError("given IP address '%s' doesn't look"
3022
                                   " like a valid IP" % ip)
3023
      inst_ip = ip
3024
    self.inst_ip = self.op.ip = inst_ip
3025

    
3026
    if self.op.start and not self.op.ip_check:
3027
      raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
3028
                                 " adding an instance in start mode")
3029

    
3030
    if self.op.ip_check:
3031
      if utils.TcpPing(hostname1.ip, constants.DEFAULT_NODED_PORT):
3032
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
3033
                                   (hostname1.ip, instance_name))
3034

    
3035
    # MAC address verification
3036
    if self.op.mac != "auto":
3037
      if not utils.IsValidMac(self.op.mac.lower()):
3038
        raise errors.OpPrereqError("invalid MAC address specified: %s" %
3039
                                   self.op.mac)
3040

    
3041
    # bridge verification
3042
    bridge = getattr(self.op, "bridge", None)
3043
    if bridge is None:
3044
      self.op.bridge = self.cfg.GetDefBridge()
3045
    else:
3046
      self.op.bridge = bridge
3047

    
3048
    # boot order verification
3049
    if self.op.hvm_boot_order is not None:
3050
      if len(self.op.hvm_boot_order.strip("acdn")) != 0:
3051
        raise errors.OpPrereqError("invalid boot order specified,"
3052
                                   " must be one or more of [acdn]")
3053
    # file storage checks
3054
    if (self.op.file_driver and
3055
        not self.op.file_driver in constants.FILE_DRIVER):
3056
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
3057
                                 self.op.file_driver)
3058

    
3059
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
3060
      raise errors.OpPrereqError("File storage directory not a relative"
3061
                                 " path")
3062
    #### allocator run
3063

    
3064
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
3065
      raise errors.OpPrereqError("One and only one of iallocator and primary"
3066
                                 " node must be given")
3067

    
3068
    if self.op.iallocator is not None:
3069
      self._RunAllocator()
3070

    
3071
    #### node related checks
3072

    
3073
    # check primary node
3074
    pnode = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.pnode))
3075
    if pnode is None:
3076
      raise errors.OpPrereqError("Primary node '%s' is unknown" %
3077
                                 self.op.pnode)
3078
    self.op.pnode = pnode.name
3079
    self.pnode = pnode
3080
    self.secondaries = []
3081

    
3082
    # mirror node verification
3083
    if self.op.disk_template in constants.DTS_NET_MIRROR:
3084
      if getattr(self.op, "snode", None) is None:
3085
        raise errors.OpPrereqError("The networked disk templates need"
3086
                                   " a mirror node")
3087

    
3088
      snode_name = self.cfg.ExpandNodeName(self.op.snode)
3089
      if snode_name is None:
3090
        raise errors.OpPrereqError("Unknown secondary node '%s'" %
3091
                                   self.op.snode)
3092
      elif snode_name == pnode.name:
3093
        raise errors.OpPrereqError("The secondary node cannot be"
3094
                                   " the primary node.")
3095
      self.secondaries.append(snode_name)
3096

    
3097
    req_size = _ComputeDiskSize(self.op.disk_template,
3098
                                self.op.disk_size, self.op.swap_size)
3099

    
3100
    # Check lv size requirements
3101
    if req_size is not None:
3102
      nodenames = [pnode.name] + self.secondaries
3103
      nodeinfo = rpc.call_node_info(nodenames, self.cfg.GetVGName())
3104
      for node in nodenames:
3105
        info = nodeinfo.get(node, None)
3106
        if not info:
3107
          raise errors.OpPrereqError("Cannot get current information"
3108
                                     " from node '%s'" % node)
3109
        vg_free = info.get('vg_free', None)
3110
        if not isinstance(vg_free, int):
3111
          raise errors.OpPrereqError("Can't compute free disk space on"
3112
                                     " node %s" % node)
3113
        if req_size > info['vg_free']:
3114
          raise errors.OpPrereqError("Not enough disk space on target node %s."
3115
                                     " %d MB available, %d MB required" %
3116
                                     (node, info['vg_free'], req_size))
3117

    
3118
    # os verification
3119
    os_obj = rpc.call_os_get(pnode.name, self.op.os_type)
3120
    if not os_obj:
3121
      raise errors.OpPrereqError("OS '%s' not in supported os list for"
3122
                                 " primary node"  % self.op.os_type)
3123

    
3124
    if self.op.kernel_path == constants.VALUE_NONE:
3125
      raise errors.OpPrereqError("Can't set instance kernel to none")
3126

    
3127

    
3128
    # bridge check on primary node
3129
    if not rpc.call_bridges_exist(self.pnode.name, [self.op.bridge]):
3130
      raise errors.OpPrereqError("target bridge '%s' does not exist on"
3131
                                 " destination node '%s'" %
3132
                                 (self.op.bridge, pnode.name))
3133

    
3134
    # memory check on primary node
3135
    if self.op.start:
3136
      _CheckNodeFreeMemory(self.cfg, self.pnode.name,
3137
                           "creating instance %s" % self.op.instance_name,
3138
                           self.op.mem_size)
3139

    
3140
    # hvm_cdrom_image_path verification
3141
    if self.op.hvm_cdrom_image_path is not None:
3142
      if not os.path.isabs(self.op.hvm_cdrom_image_path):
3143
        raise errors.OpPrereqError("The path to the HVM CDROM image must"
3144
                                   " be an absolute path or None, not %s" %
3145
                                   self.op.hvm_cdrom_image_path)
3146
      if not os.path.isfile(self.op.hvm_cdrom_image_path):
3147
        raise errors.OpPrereqError("The HVM CDROM image must either be a"
3148
                                   " regular file or a symlink pointing to"
3149
                                   " an existing regular file, not %s" %
3150
                                   self.op.hvm_cdrom_image_path)
3151

    
3152
    # vnc_bind_address verification
3153
    if self.op.vnc_bind_address is not None:
3154
      if not utils.IsValidIP(self.op.vnc_bind_address):
3155
        raise errors.OpPrereqError("given VNC bind address '%s' doesn't look"
3156
                                   " like a valid IP address" %
3157
                                   self.op.vnc_bind_address)
3158

    
3159
    if self.op.start:
3160
      self.instance_status = 'up'
3161
    else:
3162
      self.instance_status = 'down'
3163

    
3164
  def Exec(self, feedback_fn):
3165
    """Create and add the instance to the cluster.
3166

3167
    """
3168
    instance = self.op.instance_name
3169
    pnode_name = self.pnode.name
3170

    
3171
    if self.op.mac == "auto":
3172
      mac_address = self.cfg.GenerateMAC()
3173
    else:
3174
      mac_address = self.op.mac
3175

    
3176
    nic = objects.NIC(bridge=self.op.bridge, mac=mac_address)
3177
    if self.inst_ip is not None:
3178
      nic.ip = self.inst_ip
3179

    
3180
    ht_kind = self.sstore.GetHypervisorType()
3181
    if ht_kind in constants.HTS_REQ_PORT:
3182
      network_port = self.cfg.AllocatePort()
3183
    else:
3184
      network_port = None
3185

    
3186
    if self.op.vnc_bind_address is None:
3187
      self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
3188

    
3189
    # this is needed because os.path.join does not accept None arguments
3190
    if self.op.file_storage_dir is None:
3191
      string_file_storage_dir = ""
3192
    else:
3193
      string_file_storage_dir = self.op.file_storage_dir
3194

    
3195
    # build the full file storage dir path
3196
    file_storage_dir = os.path.normpath(os.path.join(
3197
                                        self.sstore.GetFileStorageDir(),
3198
                                        string_file_storage_dir, instance))
3199

    
3200

    
3201
    disks = _GenerateDiskTemplate(self.cfg,
3202
                                  self.op.disk_template,
3203
                                  instance, pnode_name,
3204
                                  self.secondaries, self.op.disk_size,
3205
                                  self.op.swap_size,
3206
                                  file_storage_dir,
3207
                                  self.op.file_driver)
3208

    
3209
    iobj = objects.Instance(name=instance, os=self.op.os_type,
3210
                            primary_node=pnode_name,
3211
                            memory=self.op.mem_size,
3212
                            vcpus=self.op.vcpus,
3213
                            nics=[nic], disks=disks,
3214
                            disk_template=self.op.disk_template,
3215
                            status=self.instance_status,
3216
                            network_port=network_port,
3217
                            kernel_path=self.op.kernel_path,
3218
                            initrd_path=self.op.initrd_path,
3219
                            hvm_boot_order=self.op.hvm_boot_order,
3220
                            hvm_acpi=self.op.hvm_acpi,
3221
                            hvm_pae=self.op.hvm_pae,
3222
                            hvm_cdrom_image_path=self.op.hvm_cdrom_image_path,
3223
                            vnc_bind_address=self.op.vnc_bind_address,
3224
                            )
3225

    
3226
    feedback_fn("* creating instance disks...")
3227
    if not _CreateDisks(self.cfg, iobj):
3228
      _RemoveDisks(iobj, self.cfg)
3229
      raise errors.OpExecError("Device creation failed, reverting...")
3230

    
3231
    feedback_fn("adding instance %s to cluster config" % instance)
3232

    
3233
    self.cfg.AddInstance(iobj)
3234
    # Add the new instance to the Ganeti Lock Manager
3235
    self.context.glm.add(locking.LEVEL_INSTANCE, instance)
3236

    
3237
    if self.op.wait_for_sync:
3238
      disk_abort = not _WaitForSync(self.cfg, iobj, self.proc)
3239
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
3240
      # make sure the disks are not degraded (still sync-ing is ok)
3241
      time.sleep(15)
3242
      feedback_fn("* checking mirrors status")
3243
      disk_abort = not _WaitForSync(self.cfg, iobj, self.proc, oneshot=True)
3244
    else:
3245
      disk_abort = False
3246

    
3247
    if disk_abort:
3248
      _RemoveDisks(iobj, self.cfg)
3249
      self.cfg.RemoveInstance(iobj.name)
3250
      # Remove the new instance from the Ganeti Lock Manager
3251
      self.context.glm.remove(locking.LEVEL_INSTANCE, iobj.name)
3252
      raise errors.OpExecError("There are some degraded disks for"
3253
                               " this instance")
3254

    
3255
    feedback_fn("creating os for instance %s on node %s" %
3256
                (instance, pnode_name))
3257

    
3258
    if iobj.disk_template != constants.DT_DISKLESS:
3259
      if self.op.mode == constants.INSTANCE_CREATE:
3260
        feedback_fn("* running the instance OS create scripts...")
3261
        if not rpc.call_instance_os_add(pnode_name, iobj, "sda", "sdb"):
3262
          raise errors.OpExecError("could not add os for instance %s"
3263
                                   " on node %s" %
3264
                                   (instance, pnode_name))
3265

    
3266
      elif self.op.mode == constants.INSTANCE_IMPORT:
3267
        feedback_fn("* running the instance OS import scripts...")
3268
        src_node = self.op.src_node
3269
        src_image = self.src_image
3270
        if not rpc.call_instance_os_import(pnode_name, iobj, "sda", "sdb",
3271
                                                src_node, src_image):
3272
          raise errors.OpExecError("Could not import os for instance"
3273
                                   " %s on node %s" %
3274
                                   (instance, pnode_name))
3275
      else:
3276
        # also checked in the prereq part
3277
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
3278
                                     % self.op.mode)
3279

    
3280
    if self.op.start:
3281
      logger.Info("starting instance %s on node %s" % (instance, pnode_name))
3282
      feedback_fn("* starting instance...")
3283
      if not rpc.call_instance_start(pnode_name, iobj, None):
3284
        raise errors.OpExecError("Could not start instance")
3285

    
3286

    
3287
class LUConnectConsole(NoHooksLU):
3288
  """Connect to an instance's console.
3289

3290
  This is somewhat special in that it returns the command line that
3291
  you need to run on the master node in order to connect to the
3292
  console.
3293

3294
  """
3295
  _OP_REQP = ["instance_name"]
3296

    
3297
  def CheckPrereq(self):
3298
    """Check prerequisites.
3299

3300
    This checks that the instance is in the cluster.
3301

3302
    """
3303
    instance = self.cfg.GetInstanceInfo(
3304
      self.cfg.ExpandInstanceName(self.op.instance_name))
3305
    if instance is None:
3306
      raise errors.OpPrereqError("Instance '%s' not known" %
3307
                                 self.op.instance_name)
3308
    self.instance = instance
3309

    
3310
  def Exec(self, feedback_fn):
3311
    """Connect to the console of an instance
3312

3313
    """
3314
    instance = self.instance
3315
    node = instance.primary_node
3316

    
3317
    node_insts = rpc.call_instance_list([node])[node]
3318
    if node_insts is False:
3319
      raise errors.OpExecError("Can't connect to node %s." % node)
3320

    
3321
    if instance.name not in node_insts:
3322
      raise errors.OpExecError("Instance %s is not running." % instance.name)
3323

    
3324
    logger.Debug("connecting to console of %s on %s" % (instance.name, node))
3325

    
3326
    hyper = hypervisor.GetHypervisor()
3327
    console_cmd = hyper.GetShellCommandForConsole(instance)
3328

    
3329
    # build ssh cmdline
3330
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
3331

    
3332

    
3333
class LUReplaceDisks(LogicalUnit):
3334
  """Replace the disks of an instance.
3335

3336
  """
3337
  HPATH = "mirrors-replace"
3338
  HTYPE = constants.HTYPE_INSTANCE
3339
  _OP_REQP = ["instance_name", "mode", "disks"]
3340

    
3341
  def _RunAllocator(self):
3342
    """Compute a new secondary node using an IAllocator.
3343

3344
    """
3345
    ial = IAllocator(self.cfg, self.sstore,
3346
                     mode=constants.IALLOCATOR_MODE_RELOC,
3347
                     name=self.op.instance_name,
3348
                     relocate_from=[self.sec_node])
3349

    
3350
    ial.Run(self.op.iallocator)
3351

    
3352
    if not ial.success:
3353
      raise errors.OpPrereqError("Can't compute nodes using"
3354
                                 " iallocator '%s': %s" % (self.op.iallocator,
3355
                                                           ial.info))
3356
    if len(ial.nodes) != ial.required_nodes:
3357
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
3358
                                 " of nodes (%s), required %s" %
3359
                                 (len(ial.nodes), ial.required_nodes))
3360
    self.op.remote_node = ial.nodes[0]
3361
    logger.ToStdout("Selected new secondary for the instance: %s" %
3362
                    self.op.remote_node)
3363

    
3364
  def BuildHooksEnv(self):
3365
    """Build hooks env.
3366

3367
    This runs on the master, the primary and all the secondaries.
3368

3369
    """
3370
    env = {
3371
      "MODE": self.op.mode,
3372
      "NEW_SECONDARY": self.op.remote_node,
3373
      "OLD_SECONDARY": self.instance.secondary_nodes[0],
3374
      }
3375
    env.update(_BuildInstanceHookEnvByObject(self.instance))
3376
    nl = [
3377
      self.sstore.GetMasterNode(),
3378
      self.instance.primary_node,
3379
      ]
3380
    if self.op.remote_node is not None:
3381
      nl.append(self.op.remote_node)
3382
    return env, nl, nl
3383

    
3384
  def CheckPrereq(self):
3385
    """Check prerequisites.
3386

3387
    This checks that the instance is in the cluster.
3388

3389
    """
3390
    if not hasattr(self.op, "remote_node"):
3391
      self.op.remote_node = None
3392

    
3393
    instance = self.cfg.GetInstanceInfo(
3394
      self.cfg.ExpandInstanceName(self.op.instance_name))
3395
    if instance is None:
3396
      raise errors.OpPrereqError("Instance '%s' not known" %
3397
                                 self.op.instance_name)
3398
    self.instance = instance
3399
    self.op.instance_name = instance.name
3400

    
3401
    if instance.disk_template not in constants.DTS_NET_MIRROR:
3402
      raise errors.OpPrereqError("Instance's disk layout is not"
3403
                                 " network mirrored.")
3404

    
3405
    if len(instance.secondary_nodes) != 1:
3406
      raise errors.OpPrereqError("The instance has a strange layout,"
3407
                                 " expected one secondary but found %d" %
3408
                                 len(instance.secondary_nodes))
3409

    
3410
    self.sec_node = instance.secondary_nodes[0]
3411

    
3412
    ia_name = getattr(self.op, "iallocator", None)
3413
    if ia_name is not None:
3414
      if self.op.remote_node is not None:
3415
        raise errors.OpPrereqError("Give either the iallocator or the new"
3416
                                   " secondary, not both")
3417
      self.op.remote_node = self._RunAllocator()
3418

    
3419
    remote_node = self.op.remote_node
3420
    if remote_node is not None:
3421
      remote_node = self.cfg.ExpandNodeName(remote_node)
3422
      if remote_node is None:
3423
        raise errors.OpPrereqError("Node '%s' not known" %
3424
                                   self.op.remote_node)
3425
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
3426
    else:
3427
      self.remote_node_info = None
3428
    if remote_node == instance.primary_node:
3429
      raise errors.OpPrereqError("The specified node is the primary node of"
3430
                                 " the instance.")
3431
    elif remote_node == self.sec_node:
3432
      if self.op.mode == constants.REPLACE_DISK_SEC:
3433
        # this is for DRBD8, where we can't execute the same mode of
3434
        # replacement as for drbd7 (no different port allocated)
3435
        raise errors.OpPrereqError("Same secondary given, cannot execute"
3436
                                   " replacement")
3437
    if instance.disk_template == constants.DT_DRBD8:
3438
      if (self.op.mode == constants.REPLACE_DISK_ALL and
3439
          remote_node is not None):
3440
        # switch to replace secondary mode
3441
        self.op.mode = constants.REPLACE_DISK_SEC
3442

    
3443
      if self.op.mode == constants.REPLACE_DISK_ALL:
3444
        raise errors.OpPrereqError("Template 'drbd' only allows primary or"
3445
                                   " secondary disk replacement, not"
3446
                                   " both at once")
3447
      elif self.op.mode == constants.REPLACE_DISK_PRI:
3448
        if remote_node is not None:
3449
          raise errors.OpPrereqError("Template 'drbd' does not allow changing"
3450
                                     " the secondary while doing a primary"
3451
                                     " node disk replacement")
3452
        self.tgt_node = instance.primary_node
3453
        self.oth_node = instance.secondary_nodes[0]
3454
      elif self.op.mode == constants.REPLACE_DISK_SEC:
3455
        self.new_node = remote_node # this can be None, in which case
3456
                                    # we don't change the secondary
3457
        self.tgt_node = instance.secondary_nodes[0]
3458
        self.oth_node = instance.primary_node
3459
      else:
3460
        raise errors.ProgrammerError("Unhandled disk replace mode")
3461

    
3462
    for name in self.op.disks:
3463
      if instance.FindDisk(name) is None:
3464
        raise errors.OpPrereqError("Disk '%s' not found for instance '%s'" %
3465
                                   (name, instance.name))
3466
    self.op.remote_node = remote_node
3467

    
3468
  def _ExecD8DiskOnly(self, feedback_fn):
3469
    """Replace a disk on the primary or secondary for dbrd8.
3470

3471
    The algorithm for replace is quite complicated:
3472
      - for each disk to be replaced:
3473
        - create new LVs on the target node with unique names
3474
        - detach old LVs from the drbd device
3475
        - rename old LVs to name_replaced.<time_t>
3476
        - rename new LVs to old LVs
3477
        - attach the new LVs (with the old names now) to the drbd device
3478
      - wait for sync across all devices
3479
      - for each modified disk:
3480
        - remove old LVs (which have the name name_replaces.<time_t>)
3481

3482
    Failures are not very well handled.
3483

3484
    """
3485
    steps_total = 6
3486
    warning, info = (self.proc.LogWarning, self.proc.LogInfo)
3487
    instance = self.instance
3488
    iv_names = {}
3489
    vgname = self.cfg.GetVGName()
3490
    # start of work
3491
    cfg = self.cfg
3492
    tgt_node = self.tgt_node
3493
    oth_node = self.oth_node
3494

    
3495
    # Step: check device activation
3496
    self.proc.LogStep(1, steps_total, "check device existence")
3497
    info("checking volume groups")
3498
    my_vg = cfg.GetVGName()
3499
    results = rpc.call_vg_list([oth_node, tgt_node])
3500
    if not results:
3501
      raise errors.OpExecError("Can't list volume groups on the nodes")
3502
    for node in oth_node, tgt_node:
3503
      res = results.get(node, False)
3504
      if not res or my_vg not in res:
3505
        raise errors.OpExecError("Volume group '%s' not found on %s" %
3506
                                 (my_vg, node))
3507
    for dev in instance.disks:
3508
      if not dev.iv_name in self.op.disks:
3509
        continue
3510
      for node in tgt_node, oth_node:
3511
        info("checking %s on %s" % (dev.iv_name, node))
3512
        cfg.SetDiskID(dev, node)
3513
        if not rpc.call_blockdev_find(node, dev):
3514
          raise errors.OpExecError("Can't find device %s on node %s" %
3515
                                   (dev.iv_name, node))
3516

    
3517
    # Step: check other node consistency
3518
    self.proc.LogStep(2, steps_total, "check peer consistency")
3519
    for dev in instance.disks:
3520
      if not dev.iv_name in self.op.disks:
3521
        continue
3522
      info("checking %s consistency on %s" % (dev.iv_name, oth_node))
3523
      if not _CheckDiskConsistency(self.cfg, dev, oth_node,
3524
                                   oth_node==instance.primary_node):
3525
        raise errors.OpExecError("Peer node (%s) has degraded storage, unsafe"
3526
                                 " to replace disks on this node (%s)" %
3527
                                 (oth_node, tgt_node))
3528

    
3529
    # Step: create new storage
3530
    self.proc.LogStep(3, steps_total, "allocate new storage")
3531
    for dev in instance.disks:
3532
      if not dev.iv_name in self.op.disks:
3533
        continue
3534
      size = dev.size
3535
      cfg.SetDiskID(dev, tgt_node)
3536
      lv_names = [".%s_%s" % (dev.iv_name, suf) for suf in ["data", "meta"]]
3537
      names = _GenerateUniqueNames(cfg, lv_names)
3538
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=size,
3539
                             logical_id=(vgname, names[0]))
3540
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
3541
                             logical_id=(vgname, names[1]))
3542
      new_lvs = [lv_data, lv_meta]
3543
      old_lvs = dev.children
3544
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
3545
      info("creating new local storage on %s for %s" %
3546
           (tgt_node, dev.iv_name))
3547
      # since we *always* want to create this LV, we use the
3548
      # _Create...OnPrimary (which forces the creation), even if we
3549
      # are talking about the secondary node
3550
      for new_lv in new_lvs:
3551
        if not _CreateBlockDevOnPrimary(cfg, tgt_node, instance, new_lv,
3552
                                        _GetInstanceInfoText(instance)):
3553
          raise errors.OpExecError("Failed to create new LV named '%s' on"
3554
                                   " node '%s'" %
3555
                                   (new_lv.logical_id[1], tgt_node))
3556

    
3557
    # Step: for each lv, detach+rename*2+attach
3558
    self.proc.LogStep(4, steps_total, "change drbd configuration")
3559
    for dev, old_lvs, new_lvs in iv_names.itervalues():
3560
      info("detaching %s drbd from local storage" % dev.iv_name)
3561
      if not rpc.call_blockdev_removechildren(tgt_node, dev, old_lvs):
3562
        raise errors.OpExecError("Can't detach drbd from local storage on node"
3563
                                 " %s for device %s" % (tgt_node, dev.iv_name))
3564
      #dev.children = []
3565
      #cfg.Update(instance)
3566

    
3567
      # ok, we created the new LVs, so now we know we have the needed
3568
      # storage; as such, we proceed on the target node to rename
3569
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
3570
      # using the assumption that logical_id == physical_id (which in
3571
      # turn is the unique_id on that node)
3572

    
3573
      # FIXME(iustin): use a better name for the replaced LVs
3574
      temp_suffix = int(time.time())
3575
      ren_fn = lambda d, suff: (d.physical_id[0],
3576
                                d.physical_id[1] + "_replaced-%s" % suff)
3577
      # build the rename list based on what LVs exist on the node
3578
      rlist = []
3579
      for to_ren in old_lvs:
3580
        find_res = rpc.call_blockdev_find(tgt_node, to_ren)
3581
        if find_res is not None: # device exists
3582
          rlist.append((to_ren, ren_fn(to_ren, temp_suffix)))
3583

    
3584
      info("renaming the old LVs on the target node")
3585
      if not rpc.call_blockdev_rename(tgt_node, rlist):
3586
        raise errors.OpExecError("Can't rename old LVs on node %s" % tgt_node)
3587
      # now we rename the new LVs to the old LVs
3588
      info("renaming the new LVs on the target node")
3589
      rlist = [(new, old.physical_id) for old, new in zip(old_lvs, new_lvs)]
3590
      if not rpc.call_blockdev_rename(tgt_node, rlist):
3591
        raise errors.OpExecError("Can't rename new LVs on node %s" % tgt_node)
3592

    
3593
      for old, new in zip(old_lvs, new_lvs):
3594
        new.logical_id = old.logical_id
3595
        cfg.SetDiskID(new, tgt_node)
3596

    
3597
      for disk in old_lvs:
3598
        disk.logical_id = ren_fn(disk, temp_suffix)
3599
        cfg.SetDiskID(disk, tgt_node)
3600

    
3601
      # now that the new lvs have the old name, we can add them to the device
3602
      info("adding new mirror component on %s" % tgt_node)
3603
      if not rpc.call_blockdev_addchildren(tgt_node, dev, new_lvs):
3604
        for new_lv in new_lvs:
3605
          if not rpc.call_blockdev_remove(tgt_node, new_lv):
3606
            warning("Can't rollback device %s", hint="manually cleanup unused"
3607
                    " logical volumes")
3608
        raise errors.OpExecError("Can't add local storage to drbd")
3609

    
3610
      dev.children = new_lvs
3611
      cfg.Update(instance)
3612

    
3613
    # Step: wait for sync
3614

    
3615
    # this can fail as the old devices are degraded and _WaitForSync
3616
    # does a combined result over all disks, so we don't check its
3617
    # return value
3618
    self.proc.LogStep(5, steps_total, "sync devices")
3619
    _WaitForSync(cfg, instance, self.proc, unlock=True)
3620

    
3621
    # so check manually all the devices
3622
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
3623
      cfg.SetDiskID(dev, instance.primary_node)
3624
      is_degr = rpc.call_blockdev_find(instance.primary_node, dev)[5]
3625
      if is_degr:
3626
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
3627

    
3628
    # Step: remove old storage
3629
    self.proc.LogStep(6, steps_total, "removing old storage")
3630
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
3631
      info("remove logical volumes for %s" % name)
3632
      for lv in old_lvs:
3633
        cfg.SetDiskID(lv, tgt_node)
3634
        if not rpc.call_blockdev_remove(tgt_node, lv):
3635
          warning("Can't remove old LV", hint="manually remove unused LVs")
3636
          continue
3637

    
3638
  def _ExecD8Secondary(self, feedback_fn):
3639
    """Replace the secondary node for drbd8.
3640

3641
    The algorithm for replace is quite complicated:
3642
      - for all disks of the instance:
3643
        - create new LVs on the new node with same names
3644
        - shutdown the drbd device on the old secondary
3645
        - disconnect the drbd network on the primary
3646
        - create the drbd device on the new secondary
3647
        - network attach the drbd on the primary, using an artifice:
3648
          the drbd code for Attach() will connect to the network if it
3649
          finds a device which is connected to the good local disks but
3650
          not network enabled
3651
      - wait for sync across all devices
3652
      - remove all disks from the old secondary
3653

3654
    Failures are not very well handled.
3655

3656
    """
3657
    steps_total = 6
3658
    warning, info = (self.proc.LogWarning, self.proc.LogInfo)
3659
    instance = self.instance
3660
    iv_names = {}
3661
    vgname = self.cfg.GetVGName()
3662
    # start of work
3663
    cfg = self.cfg
3664
    old_node = self.tgt_node
3665
    new_node = self.new_node
3666
    pri_node = instance.primary_node
3667

    
3668
    # Step: check device activation
3669
    self.proc.LogStep(1, steps_total, "check device existence")
3670
    info("checking volume groups")
3671
    my_vg = cfg.GetVGName()
3672
    results = rpc.call_vg_list([pri_node, new_node])
3673
    if not results:
3674
      raise errors.OpExecError("Can't list volume groups on the nodes")
3675
    for node in pri_node, new_node:
3676
      res = results.get(node, False)
3677
      if not res or my_vg not in res:
3678
        raise errors.OpExecError("Volume group '%s' not found on %s" %
3679
                                 (my_vg, node))
3680
    for dev in instance.disks:
3681
      if not dev.iv_name in self.op.disks:
3682
        continue
3683
      info("checking %s on %s" % (dev.iv_name, pri_node))
3684
      cfg.SetDiskID(dev, pri_node)
3685
      if not rpc.call_blockdev_find(pri_node, dev):
3686
        raise errors.OpExecError("Can't find device %s on node %s" %
3687
                                 (dev.iv_name, pri_node))
3688

    
3689
    # Step: check other node consistency
3690
    self.proc.LogStep(2, steps_total, "check peer consistency")
3691
    for dev in instance.disks:
3692
      if not dev.iv_name in self.op.disks:
3693
        continue
3694
      info("checking %s consistency on %s" % (dev.iv_name, pri_node))
3695
      if not _CheckDiskConsistency(self.cfg, dev, pri_node, True, ldisk=True):
3696
        raise errors.OpExecError("Primary node (%s) has degraded storage,"
3697
                                 " unsafe to replace the secondary" %
3698
                                 pri_node)
3699

    
3700
    # Step: create new storage
3701
    self.proc.LogStep(3, steps_total, "allocate new storage")
3702
    for dev in instance.disks:
3703
      size = dev.size
3704
      info("adding new local storage on %s for %s" % (new_node, dev.iv_name))
3705
      # since we *always* want to create this LV, we use the
3706
      # _Create...OnPrimary (which forces the creation), even if we
3707
      # are talking about the secondary node
3708
      for new_lv in dev.children:
3709
        if not _CreateBlockDevOnPrimary(cfg, new_node, instance, new_lv,
3710
                                        _GetInstanceInfoText(instance)):
3711
          raise errors.OpExecError("Failed to create new LV named '%s' on"
3712
                                   " node '%s'" %
3713
                                   (new_lv.logical_id[1], new_node))
3714

    
3715
      iv_names[dev.iv_name] = (dev, dev.children)
3716

    
3717
    self.proc.LogStep(4, steps_total, "changing drbd configuration")
3718
    for dev in instance.disks:
3719
      size = dev.size
3720
      info("activating a new drbd on %s for %s" % (new_node, dev.iv_name))
3721
      # create new devices on new_node
3722
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
3723
                              logical_id=(pri_node, new_node,
3724
                                          dev.logical_id[2]),
3725
                              children=dev.children)
3726
      if not _CreateBlockDevOnSecondary(cfg, new_node, instance,
3727
                                        new_drbd, False,
3728
                                      _GetInstanceInfoText(instance)):
3729
        raise errors.OpExecError("Failed to create new DRBD on"
3730
                                 " node '%s'" % new_node)
3731

    
3732
    for dev in instance.disks:
3733
      # we have new devices, shutdown the drbd on the old secondary
3734
      info("shutting down drbd for %s on old node" % dev.iv_name)
3735
      cfg.SetDiskID(dev, old_node)
3736
      if not rpc.call_blockdev_shutdown(old_node, dev):
3737
        warning("Failed to shutdown drbd for %s on old node" % dev.iv_name,
3738
                hint="Please cleanup this device manually as soon as possible")
3739

    
3740
    info("detaching primary drbds from the network (=> standalone)")
3741
    done = 0
3742
    for dev in instance.disks:
3743
      cfg.SetDiskID(dev, pri_node)
3744
      # set the physical (unique in bdev terms) id to None, meaning
3745
      # detach from network
3746
      dev.physical_id = (None,) * len(dev.physical_id)
3747
      # and 'find' the device, which will 'fix' it to match the
3748
      # standalone state
3749
      if rpc.call_blockdev_find(pri_node, dev):
3750
        done += 1
3751
      else:
3752
        warning("Failed to detach drbd %s from network, unusual case" %
3753
                dev.iv_name)
3754

    
3755
    if not done:
3756
      # no detaches succeeded (very unlikely)
3757
      raise errors.OpExecError("Can't detach at least one DRBD from old node")
3758

    
3759
    # if we managed to detach at least one, we update all the disks of
3760
    # the instance to point to the new secondary
3761
    info("updating instance configuration")
3762
    for dev in instance.disks:
3763
      dev.logical_id = (pri_node, new_node) + dev.logical_id[2:]
3764
      cfg.SetDiskID(dev, pri_node)
3765
    cfg.Update(instance)
3766

    
3767
    # and now perform the drbd attach
3768
    info("attaching primary drbds to new secondary (standalone => connected)")
3769
    failures = []
3770
    for dev in instance.disks:
3771
      info("attaching primary drbd for %s to new secondary node" % dev.iv_name)
3772
      # since the attach is smart, it's enough to 'find' the device,
3773
      # it will automatically activate the network, if the physical_id
3774
      # is correct
3775
      cfg.SetDiskID(dev, pri_node)
3776
      if not rpc.call_blockdev_find(pri_node, dev):
3777
        warning("can't attach drbd %s to new secondary!" % dev.iv_name,
3778
                "please do a gnt-instance info to see the status of disks")
3779

    
3780
    # this can fail as the old devices are degraded and _WaitForSync
3781
    # does a combined result over all disks, so we don't check its
3782
    # return value
3783
    self.proc.LogStep(5, steps_total, "sync devices")
3784
    _WaitForSync(cfg, instance, self.proc, unlock=True)
3785

    
3786
    # so check manually all the devices
3787
    for name, (dev, old_lvs) in iv_names.iteritems():
3788
      cfg.SetDiskID(dev, pri_node)
3789
      is_degr = rpc.call_blockdev_find(pri_node, dev)[5]
3790
      if is_degr:
3791
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
3792

    
3793
    self.proc.LogStep(6, steps_total, "removing old storage")
3794
    for name, (dev, old_lvs) in iv_names.iteritems():
3795
      info("remove logical volumes for %s" % name)
3796
      for lv in old_lvs:
3797
        cfg.SetDiskID(lv, old_node)
3798
        if not rpc.call_blockdev_remove(old_node, lv):
3799
          warning("Can't remove LV on old secondary",
3800
                  hint="Cleanup stale volumes by hand")
3801

    
3802
  def Exec(self, feedback_fn):
3803
    """Execute disk replacement.
3804

3805
    This dispatches the disk replacement to the appropriate handler.
3806

3807
    """
3808
    instance = self.instance
3809

    
3810
    # Activate the instance disks if we're replacing them on a down instance
3811
    if instance.status == "down":
3812
      op = opcodes.OpActivateInstanceDisks(instance_name=instance.name)
3813
      self.proc.ChainOpCode(op)
3814

    
3815
    if instance.disk_template == constants.DT_DRBD8:
3816
      if self.op.remote_node is None:
3817
        fn = self._ExecD8DiskOnly
3818
      else:
3819
        fn = self._ExecD8Secondary
3820
    else:
3821
      raise errors.ProgrammerError("Unhandled disk replacement case")
3822

    
3823
    ret = fn(feedback_fn)
3824

    
3825
    # Deactivate the instance disks if we're replacing them on a down instance
3826
    if instance.status == "down":
3827
      op = opcodes.OpDeactivateInstanceDisks(instance_name=instance.name)
3828
      self.proc.ChainOpCode(op)
3829

    
3830
    return ret
3831

    
3832

    
3833
class LUGrowDisk(LogicalUnit):
3834
  """Grow a disk of an instance.
3835

3836
  """
3837
  HPATH = "disk-grow"
3838
  HTYPE = constants.HTYPE_INSTANCE
3839
  _OP_REQP = ["instance_name", "disk", "amount"]
3840

    
3841
  def BuildHooksEnv(self):
3842
    """Build hooks env.
3843

3844
    This runs on the master, the primary and all the secondaries.
3845

3846
    """
3847
    env = {
3848
      "DISK": self.op.disk,
3849
      "AMOUNT": self.op.amount,
3850
      }
3851
    env.update(_BuildInstanceHookEnvByObject(self.instance))
3852
    nl = [
3853
      self.sstore.GetMasterNode(),
3854
      self.instance.primary_node,
3855
      ]
3856
    return env, nl, nl
3857

    
3858
  def CheckPrereq(self):
3859
    """Check prerequisites.
3860

3861
    This checks that the instance is in the cluster.
3862

3863
    """
3864
    instance = self.cfg.GetInstanceInfo(
3865
      self.cfg.ExpandInstanceName(self.op.instance_name))
3866
    if instance is None:
3867
      raise errors.OpPrereqError("Instance '%s' not known" %
3868
                                 self.op.instance_name)
3869
    self.instance = instance
3870
    self.op.instance_name = instance.name
3871

    
3872
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
3873
      raise errors.OpPrereqError("Instance's disk layout does not support"
3874
                                 " growing.")
3875

    
3876
    if instance.FindDisk(self.op.disk) is None:
3877
      raise errors.OpPrereqError("Disk '%s' not found for instance '%s'" %
3878
                                 (self.op.disk, instance.name))
3879

    
3880
    nodenames = [instance.primary_node] + list(instance.secondary_nodes)
3881
    nodeinfo = rpc.call_node_info(nodenames, self.cfg.GetVGName())
3882
    for node in nodenames:
3883
      info = nodeinfo.get(node, None)
3884
      if not info:
3885
        raise errors.OpPrereqError("Cannot get current information"
3886
                                   " from node '%s'" % node)
3887
      vg_free = info.get('vg_free', None)
3888
      if not isinstance(vg_free, int):
3889
        raise errors.OpPrereqError("Can't compute free disk space on"
3890
                                   " node %s" % node)
3891
      if self.op.amount > info['vg_free']:
3892
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
3893
                                   " %d MiB available, %d MiB required" %
3894
                                   (node, info['vg_free'], self.op.amount))
3895

    
3896
  def Exec(self, feedback_fn):
3897
    """Execute disk grow.
3898

3899
    """
3900
    instance = self.instance
3901
    disk = instance.FindDisk(self.op.disk)
3902
    for node in (instance.secondary_nodes + (instance.primary_node,)):
3903
      self.cfg.SetDiskID(disk, node)
3904
      result = rpc.call_blockdev_grow(node, disk, self.op.amount)
3905
      if not result or not isinstance(result, tuple) or len(result) != 2:
3906
        raise errors.OpExecError("grow request failed to node %s" % node)
3907
      elif not result[0]:
3908
        raise errors.OpExecError("grow request failed to node %s: %s" %
3909
                                 (node, result[1]))
3910
    disk.RecordGrow(self.op.amount)
3911
    self.cfg.Update(instance)
3912
    return
3913

    
3914

    
3915
class LUQueryInstanceData(NoHooksLU):
3916
  """Query runtime instance data.
3917

3918
  """
3919
  _OP_REQP = ["instances"]
3920

    
3921
  def CheckPrereq(self):
3922
    """Check prerequisites.
3923

3924
    This only checks the optional instance list against the existing names.
3925

3926
    """
3927
    if not isinstance(self.op.instances, list):
3928
      raise errors.OpPrereqError("Invalid argument type 'instances'")
3929
    if self.op.instances:
3930
      self.wanted_instances = []
3931
      names = self.op.instances
3932
      for name in names:
3933
        instance = self.cfg.GetInstanceInfo(self.cfg.ExpandInstanceName(name))
3934
        if instance is None:
3935
          raise errors.OpPrereqError("No such instance name '%s'" % name)
3936
        self.wanted_instances.append(instance)
3937
    else:
3938
      self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
3939
                               in self.cfg.GetInstanceList()]
3940
    return
3941

    
3942

    
3943
  def _ComputeDiskStatus(self, instance, snode, dev):
3944
    """Compute block device status.
3945

3946
    """
3947
    self.cfg.SetDiskID(dev, instance.primary_node)
3948
    dev_pstatus = rpc.call_blockdev_find(instance.primary_node, dev)
3949
    if dev.dev_type in constants.LDS_DRBD:
3950
      # we change the snode then (otherwise we use the one passed in)
3951
      if dev.logical_id[0] == instance.primary_node:
3952
        snode = dev.logical_id[1]
3953
      else:
3954
        snode = dev.logical_id[0]
3955

    
3956
    if snode:
3957
      self.cfg.SetDiskID(dev, snode)
3958
      dev_sstatus = rpc.call_blockdev_find(snode, dev)
3959
    else:
3960
      dev_sstatus = None
3961

    
3962
    if dev.children:
3963
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
3964
                      for child in dev.children]
3965
    else: