Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 1a05d855

History | View | Annotate | Download (191 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0613,W0201
25

    
26
import os
27
import os.path
28
import sha
29
import time
30
import tempfile
31
import re
32
import platform
33
import logging
34
import copy
35

    
36
from ganeti import ssh
37
from ganeti import utils
38
from ganeti import errors
39
from ganeti import hypervisor
40
from ganeti import locking
41
from ganeti import constants
42
from ganeti import objects
43
from ganeti import opcodes
44
from ganeti import serializer
45

    
46

    
47
class LogicalUnit(object):
48
  """Logical Unit base class.
49

50
  Subclasses must follow these rules:
51
    - implement ExpandNames
52
    - implement CheckPrereq
53
    - implement Exec
54
    - implement BuildHooksEnv
55
    - redefine HPATH and HTYPE
56
    - optionally redefine their run requirements:
57
        REQ_MASTER: the LU needs to run on the master node
58
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
59

60
  Note that all commands require root permissions.
61

62
  """
63
  HPATH = None
64
  HTYPE = None
65
  _OP_REQP = []
66
  REQ_MASTER = True
67
  REQ_BGL = True
68

    
69
  def __init__(self, processor, op, context, rpc):
70
    """Constructor for LogicalUnit.
71

72
    This needs to be overriden in derived classes in order to check op
73
    validity.
74

75
    """
76
    self.proc = processor
77
    self.op = op
78
    self.cfg = context.cfg
79
    self.context = context
80
    self.rpc = rpc
81
    # Dicts used to declare locking needs to mcpu
82
    self.needed_locks = None
83
    self.acquired_locks = {}
84
    self.share_locks = dict(((i, 0) for i in locking.LEVELS))
85
    self.add_locks = {}
86
    self.remove_locks = {}
87
    # Used to force good behavior when calling helper functions
88
    self.recalculate_locks = {}
89
    self.__ssh = None
90
    # logging
91
    self.LogWarning = processor.LogWarning
92
    self.LogInfo = processor.LogInfo
93

    
94
    for attr_name in self._OP_REQP:
95
      attr_val = getattr(op, attr_name, None)
96
      if attr_val is None:
97
        raise errors.OpPrereqError("Required parameter '%s' missing" %
98
                                   attr_name)
99

    
100
    if not self.cfg.IsCluster():
101
      raise errors.OpPrereqError("Cluster not initialized yet,"
102
                                 " use 'gnt-cluster init' first.")
103
    if self.REQ_MASTER:
104
      master = self.cfg.GetMasterNode()
105
      if master != utils.HostInfo().name:
106
        raise errors.OpPrereqError("Commands must be run on the master"
107
                                   " node %s" % master)
108

    
109
  def __GetSSH(self):
110
    """Returns the SshRunner object
111

112
    """
113
    if not self.__ssh:
114
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
115
    return self.__ssh
116

    
117
  ssh = property(fget=__GetSSH)
118

    
119
  def ExpandNames(self):
120
    """Expand names for this LU.
121

122
    This method is called before starting to execute the opcode, and it should
123
    update all the parameters of the opcode to their canonical form (e.g. a
124
    short node name must be fully expanded after this method has successfully
125
    completed). This way locking, hooks, logging, ecc. can work correctly.
126

127
    LUs which implement this method must also populate the self.needed_locks
128
    member, as a dict with lock levels as keys, and a list of needed lock names
129
    as values. Rules:
130
      - Use an empty dict if you don't need any lock
131
      - If you don't need any lock at a particular level omit that level
132
      - Don't put anything for the BGL level
133
      - If you want all locks at a level use locking.ALL_SET as a value
134

135
    If you need to share locks (rather than acquire them exclusively) at one
136
    level you can modify self.share_locks, setting a true value (usually 1) for
137
    that level. By default locks are not shared.
138

139
    Examples:
140
    # Acquire all nodes and one instance
141
    self.needed_locks = {
142
      locking.LEVEL_NODE: locking.ALL_SET,
143
      locking.LEVEL_INSTANCE: ['instance1.example.tld'],
144
    }
145
    # Acquire just two nodes
146
    self.needed_locks = {
147
      locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
148
    }
149
    # Acquire no locks
150
    self.needed_locks = {} # No, you can't leave it to the default value None
151

152
    """
153
    # The implementation of this method is mandatory only if the new LU is
154
    # concurrent, so that old LUs don't need to be changed all at the same
155
    # time.
156
    if self.REQ_BGL:
157
      self.needed_locks = {} # Exclusive LUs don't need locks.
158
    else:
159
      raise NotImplementedError
160

    
161
  def DeclareLocks(self, level):
162
    """Declare LU locking needs for a level
163

164
    While most LUs can just declare their locking needs at ExpandNames time,
165
    sometimes there's the need to calculate some locks after having acquired
166
    the ones before. This function is called just before acquiring locks at a
167
    particular level, but after acquiring the ones at lower levels, and permits
168
    such calculations. It can be used to modify self.needed_locks, and by
169
    default it does nothing.
170

171
    This function is only called if you have something already set in
172
    self.needed_locks for the level.
173

174
    @param level: Locking level which is going to be locked
175
    @type level: member of ganeti.locking.LEVELS
176

177
    """
178

    
179
  def CheckPrereq(self):
180
    """Check prerequisites for this LU.
181

182
    This method should check that the prerequisites for the execution
183
    of this LU are fulfilled. It can do internode communication, but
184
    it should be idempotent - no cluster or system changes are
185
    allowed.
186

187
    The method should raise errors.OpPrereqError in case something is
188
    not fulfilled. Its return value is ignored.
189

190
    This method should also update all the parameters of the opcode to
191
    their canonical form if it hasn't been done by ExpandNames before.
192

193
    """
194
    raise NotImplementedError
195

    
196
  def Exec(self, feedback_fn):
197
    """Execute the LU.
198

199
    This method should implement the actual work. It should raise
200
    errors.OpExecError for failures that are somewhat dealt with in
201
    code, or expected.
202

203
    """
204
    raise NotImplementedError
205

    
206
  def BuildHooksEnv(self):
207
    """Build hooks environment for this LU.
208

209
    This method should return a three-node tuple consisting of: a dict
210
    containing the environment that will be used for running the
211
    specific hook for this LU, a list of node names on which the hook
212
    should run before the execution, and a list of node names on which
213
    the hook should run after the execution.
214

215
    The keys of the dict must not have 'GANETI_' prefixed as this will
216
    be handled in the hooks runner. Also note additional keys will be
217
    added by the hooks runner. If the LU doesn't define any
218
    environment, an empty dict (and not None) should be returned.
219

220
    No nodes should be returned as an empty list (and not None).
221

222
    Note that if the HPATH for a LU class is None, this function will
223
    not be called.
224

225
    """
226
    raise NotImplementedError
227

    
228
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
229
    """Notify the LU about the results of its hooks.
230

231
    This method is called every time a hooks phase is executed, and notifies
232
    the Logical Unit about the hooks' result. The LU can then use it to alter
233
    its result based on the hooks.  By default the method does nothing and the
234
    previous result is passed back unchanged but any LU can define it if it
235
    wants to use the local cluster hook-scripts somehow.
236

237
    Args:
238
      phase: the hooks phase that has just been run
239
      hooks_results: the results of the multi-node hooks rpc call
240
      feedback_fn: function to send feedback back to the caller
241
      lu_result: the previous result this LU had, or None in the PRE phase.
242

243
    """
244
    return lu_result
245

    
246
  def _ExpandAndLockInstance(self):
247
    """Helper function to expand and lock an instance.
248

249
    Many LUs that work on an instance take its name in self.op.instance_name
250
    and need to expand it and then declare the expanded name for locking. This
251
    function does it, and then updates self.op.instance_name to the expanded
252
    name. It also initializes needed_locks as a dict, if this hasn't been done
253
    before.
254

255
    """
256
    if self.needed_locks is None:
257
      self.needed_locks = {}
258
    else:
259
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
260
        "_ExpandAndLockInstance called with instance-level locks set"
261
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
262
    if expanded_name is None:
263
      raise errors.OpPrereqError("Instance '%s' not known" %
264
                                  self.op.instance_name)
265
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
266
    self.op.instance_name = expanded_name
267

    
268
  def _LockInstancesNodes(self, primary_only=False):
269
    """Helper function to declare instances' nodes for locking.
270

271
    This function should be called after locking one or more instances to lock
272
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
273
    with all primary or secondary nodes for instances already locked and
274
    present in self.needed_locks[locking.LEVEL_INSTANCE].
275

276
    It should be called from DeclareLocks, and for safety only works if
277
    self.recalculate_locks[locking.LEVEL_NODE] is set.
278

279
    In the future it may grow parameters to just lock some instance's nodes, or
280
    to just lock primaries or secondary nodes, if needed.
281

282
    If should be called in DeclareLocks in a way similar to:
283

284
    if level == locking.LEVEL_NODE:
285
      self._LockInstancesNodes()
286

287
    @type primary_only: boolean
288
    @param primary_only: only lock primary nodes of locked instances
289

290
    """
291
    assert locking.LEVEL_NODE in self.recalculate_locks, \
292
      "_LockInstancesNodes helper function called with no nodes to recalculate"
293

    
294
    # TODO: check if we're really been called with the instance locks held
295

    
296
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
297
    # future we might want to have different behaviors depending on the value
298
    # of self.recalculate_locks[locking.LEVEL_NODE]
299
    wanted_nodes = []
300
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
301
      instance = self.context.cfg.GetInstanceInfo(instance_name)
302
      wanted_nodes.append(instance.primary_node)
303
      if not primary_only:
304
        wanted_nodes.extend(instance.secondary_nodes)
305

    
306
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
307
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
308
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
309
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
310

    
311
    del self.recalculate_locks[locking.LEVEL_NODE]
312

    
313

    
314
class NoHooksLU(LogicalUnit):
315
  """Simple LU which runs no hooks.
316

317
  This LU is intended as a parent for other LogicalUnits which will
318
  run no hooks, in order to reduce duplicate code.
319

320
  """
321
  HPATH = None
322
  HTYPE = None
323

    
324

    
325
def _GetWantedNodes(lu, nodes):
326
  """Returns list of checked and expanded node names.
327

328
  Args:
329
    nodes: List of nodes (strings) or None for all
330

331
  """
332
  if not isinstance(nodes, list):
333
    raise errors.OpPrereqError("Invalid argument type 'nodes'")
334

    
335
  if not nodes:
336
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
337
      " non-empty list of nodes whose name is to be expanded.")
338

    
339
  wanted = []
340
  for name in nodes:
341
    node = lu.cfg.ExpandNodeName(name)
342
    if node is None:
343
      raise errors.OpPrereqError("No such node name '%s'" % name)
344
    wanted.append(node)
345

    
346
  return utils.NiceSort(wanted)
347

    
348

    
349
def _GetWantedInstances(lu, instances):
350
  """Returns list of checked and expanded instance names.
351

352
  Args:
353
    instances: List of instances (strings) or None for all
354

355
  """
356
  if not isinstance(instances, list):
357
    raise errors.OpPrereqError("Invalid argument type 'instances'")
358

    
359
  if instances:
360
    wanted = []
361

    
362
    for name in instances:
363
      instance = lu.cfg.ExpandInstanceName(name)
364
      if instance is None:
365
        raise errors.OpPrereqError("No such instance name '%s'" % name)
366
      wanted.append(instance)
367

    
368
  else:
369
    wanted = lu.cfg.GetInstanceList()
370
  return utils.NiceSort(wanted)
371

    
372

    
373
def _CheckOutputFields(static, dynamic, selected):
374
  """Checks whether all selected fields are valid.
375

376
  Args:
377
    static: Static fields
378
    dynamic: Dynamic fields
379

380
  """
381
  static_fields = frozenset(static)
382
  dynamic_fields = frozenset(dynamic)
383

    
384
  all_fields = static_fields | dynamic_fields
385

    
386
  if not all_fields.issuperset(selected):
387
    raise errors.OpPrereqError("Unknown output fields selected: %s"
388
                               % ",".join(frozenset(selected).
389
                                          difference(all_fields)))
390

    
391

    
392
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
393
                          memory, vcpus, nics):
394
  """Builds instance related env variables for hooks from single variables.
395

396
  Args:
397
    secondary_nodes: List of secondary nodes as strings
398
  """
399
  env = {
400
    "OP_TARGET": name,
401
    "INSTANCE_NAME": name,
402
    "INSTANCE_PRIMARY": primary_node,
403
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
404
    "INSTANCE_OS_TYPE": os_type,
405
    "INSTANCE_STATUS": status,
406
    "INSTANCE_MEMORY": memory,
407
    "INSTANCE_VCPUS": vcpus,
408
  }
409

    
410
  if nics:
411
    nic_count = len(nics)
412
    for idx, (ip, bridge, mac) in enumerate(nics):
413
      if ip is None:
414
        ip = ""
415
      env["INSTANCE_NIC%d_IP" % idx] = ip
416
      env["INSTANCE_NIC%d_BRIDGE" % idx] = bridge
417
      env["INSTANCE_NIC%d_HWADDR" % idx] = mac
418
  else:
419
    nic_count = 0
420

    
421
  env["INSTANCE_NIC_COUNT"] = nic_count
422

    
423
  return env
424

    
425

    
426
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
427
  """Builds instance related env variables for hooks from an object.
428

429
  Args:
430
    instance: objects.Instance object of instance
431
    override: dict of values to override
432
  """
433
  bep = lu.cfg.GetClusterInfo().FillBE(instance)
434
  args = {
435
    'name': instance.name,
436
    'primary_node': instance.primary_node,
437
    'secondary_nodes': instance.secondary_nodes,
438
    'os_type': instance.os,
439
    'status': instance.os,
440
    'memory': bep[constants.BE_MEMORY],
441
    'vcpus': bep[constants.BE_VCPUS],
442
    'nics': [(nic.ip, nic.bridge, nic.mac) for nic in instance.nics],
443
  }
444
  if override:
445
    args.update(override)
446
  return _BuildInstanceHookEnv(**args)
447

    
448

    
449
def _CheckInstanceBridgesExist(lu, instance):
450
  """Check that the brigdes needed by an instance exist.
451

452
  """
453
  # check bridges existance
454
  brlist = [nic.bridge for nic in instance.nics]
455
  if not lu.rpc.call_bridges_exist(instance.primary_node, brlist):
456
    raise errors.OpPrereqError("one or more target bridges %s does not"
457
                               " exist on destination node '%s'" %
458
                               (brlist, instance.primary_node))
459

    
460

    
461
class LUDestroyCluster(NoHooksLU):
462
  """Logical unit for destroying the cluster.
463

464
  """
465
  _OP_REQP = []
466

    
467
  def CheckPrereq(self):
468
    """Check prerequisites.
469

470
    This checks whether the cluster is empty.
471

472
    Any errors are signalled by raising errors.OpPrereqError.
473

474
    """
475
    master = self.cfg.GetMasterNode()
476

    
477
    nodelist = self.cfg.GetNodeList()
478
    if len(nodelist) != 1 or nodelist[0] != master:
479
      raise errors.OpPrereqError("There are still %d node(s) in"
480
                                 " this cluster." % (len(nodelist) - 1))
481
    instancelist = self.cfg.GetInstanceList()
482
    if instancelist:
483
      raise errors.OpPrereqError("There are still %d instance(s) in"
484
                                 " this cluster." % len(instancelist))
485

    
486
  def Exec(self, feedback_fn):
487
    """Destroys the cluster.
488

489
    """
490
    master = self.cfg.GetMasterNode()
491
    if not self.rpc.call_node_stop_master(master, False):
492
      raise errors.OpExecError("Could not disable the master role")
493
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
494
    utils.CreateBackup(priv_key)
495
    utils.CreateBackup(pub_key)
496
    return master
497

    
498

    
499
class LUVerifyCluster(LogicalUnit):
500
  """Verifies the cluster status.
501

502
  """
503
  HPATH = "cluster-verify"
504
  HTYPE = constants.HTYPE_CLUSTER
505
  _OP_REQP = ["skip_checks"]
506
  REQ_BGL = False
507

    
508
  def ExpandNames(self):
509
    self.needed_locks = {
510
      locking.LEVEL_NODE: locking.ALL_SET,
511
      locking.LEVEL_INSTANCE: locking.ALL_SET,
512
    }
513
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
514

    
515
  def _VerifyNode(self, node, file_list, local_cksum, vglist, node_result,
516
                  remote_version, feedback_fn):
517
    """Run multiple tests against a node.
518

519
    Test list:
520
      - compares ganeti version
521
      - checks vg existance and size > 20G
522
      - checks config file checksum
523
      - checks ssh to other nodes
524

525
    Args:
526
      node: name of the node to check
527
      file_list: required list of files
528
      local_cksum: dictionary of local files and their checksums
529

530
    """
531
    # compares ganeti version
532
    local_version = constants.PROTOCOL_VERSION
533
    if not remote_version:
534
      feedback_fn("  - ERROR: connection to %s failed" % (node))
535
      return True
536

    
537
    if local_version != remote_version:
538
      feedback_fn("  - ERROR: sw version mismatch: master %s, node(%s) %s" %
539
                      (local_version, node, remote_version))
540
      return True
541

    
542
    # checks vg existance and size > 20G
543

    
544
    bad = False
545
    if not vglist:
546
      feedback_fn("  - ERROR: unable to check volume groups on node %s." %
547
                      (node,))
548
      bad = True
549
    else:
550
      vgstatus = utils.CheckVolumeGroupSize(vglist, self.cfg.GetVGName(),
551
                                            constants.MIN_VG_SIZE)
552
      if vgstatus:
553
        feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
554
        bad = True
555

    
556
    if not node_result:
557
      feedback_fn("  - ERROR: unable to verify node %s." % (node,))
558
      return True
559

    
560
    # checks config file checksum
561
    # checks ssh to any
562

    
563
    if 'filelist' not in node_result:
564
      bad = True
565
      feedback_fn("  - ERROR: node hasn't returned file checksum data")
566
    else:
567
      remote_cksum = node_result['filelist']
568
      for file_name in file_list:
569
        if file_name not in remote_cksum:
570
          bad = True
571
          feedback_fn("  - ERROR: file '%s' missing" % file_name)
572
        elif remote_cksum[file_name] != local_cksum[file_name]:
573
          bad = True
574
          feedback_fn("  - ERROR: file '%s' has wrong checksum" % file_name)
575

    
576
    if 'nodelist' not in node_result:
577
      bad = True
578
      feedback_fn("  - ERROR: node hasn't returned node ssh connectivity data")
579
    else:
580
      if node_result['nodelist']:
581
        bad = True
582
        for node in node_result['nodelist']:
583
          feedback_fn("  - ERROR: ssh communication with node '%s': %s" %
584
                          (node, node_result['nodelist'][node]))
585
    if 'node-net-test' not in node_result:
586
      bad = True
587
      feedback_fn("  - ERROR: node hasn't returned node tcp connectivity data")
588
    else:
589
      if node_result['node-net-test']:
590
        bad = True
591
        nlist = utils.NiceSort(node_result['node-net-test'].keys())
592
        for node in nlist:
593
          feedback_fn("  - ERROR: tcp communication with node '%s': %s" %
594
                          (node, node_result['node-net-test'][node]))
595

    
596
    hyp_result = node_result.get('hypervisor', None)
597
    if isinstance(hyp_result, dict):
598
      for hv_name, hv_result in hyp_result.iteritems():
599
        if hv_result is not None:
600
          feedback_fn("  - ERROR: hypervisor %s verify failure: '%s'" %
601
                      (hv_name, hv_result))
602
    return bad
603

    
604
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
605
                      node_instance, feedback_fn):
606
    """Verify an instance.
607

608
    This function checks to see if the required block devices are
609
    available on the instance's node.
610

611
    """
612
    bad = False
613

    
614
    node_current = instanceconfig.primary_node
615

    
616
    node_vol_should = {}
617
    instanceconfig.MapLVsByNode(node_vol_should)
618

    
619
    for node in node_vol_should:
620
      for volume in node_vol_should[node]:
621
        if node not in node_vol_is or volume not in node_vol_is[node]:
622
          feedback_fn("  - ERROR: volume %s missing on node %s" %
623
                          (volume, node))
624
          bad = True
625

    
626
    if not instanceconfig.status == 'down':
627
      if (node_current not in node_instance or
628
          not instance in node_instance[node_current]):
629
        feedback_fn("  - ERROR: instance %s not running on node %s" %
630
                        (instance, node_current))
631
        bad = True
632

    
633
    for node in node_instance:
634
      if (not node == node_current):
635
        if instance in node_instance[node]:
636
          feedback_fn("  - ERROR: instance %s should not run on node %s" %
637
                          (instance, node))
638
          bad = True
639

    
640
    return bad
641

    
642
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
643
    """Verify if there are any unknown volumes in the cluster.
644

645
    The .os, .swap and backup volumes are ignored. All other volumes are
646
    reported as unknown.
647

648
    """
649
    bad = False
650

    
651
    for node in node_vol_is:
652
      for volume in node_vol_is[node]:
653
        if node not in node_vol_should or volume not in node_vol_should[node]:
654
          feedback_fn("  - ERROR: volume %s on node %s should not exist" %
655
                      (volume, node))
656
          bad = True
657
    return bad
658

    
659
  def _VerifyOrphanInstances(self, instancelist, node_instance, feedback_fn):
660
    """Verify the list of running instances.
661

662
    This checks what instances are running but unknown to the cluster.
663

664
    """
665
    bad = False
666
    for node in node_instance:
667
      for runninginstance in node_instance[node]:
668
        if runninginstance not in instancelist:
669
          feedback_fn("  - ERROR: instance %s on node %s should not exist" %
670
                          (runninginstance, node))
671
          bad = True
672
    return bad
673

    
674
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg, feedback_fn):
675
    """Verify N+1 Memory Resilience.
676

677
    Check that if one single node dies we can still start all the instances it
678
    was primary for.
679

680
    """
681
    bad = False
682

    
683
    for node, nodeinfo in node_info.iteritems():
684
      # This code checks that every node which is now listed as secondary has
685
      # enough memory to host all instances it is supposed to should a single
686
      # other node in the cluster fail.
687
      # FIXME: not ready for failover to an arbitrary node
688
      # FIXME: does not support file-backed instances
689
      # WARNING: we currently take into account down instances as well as up
690
      # ones, considering that even if they're down someone might want to start
691
      # them even in the event of a node failure.
692
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
693
        needed_mem = 0
694
        for instance in instances:
695
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
696
          if bep[constants.BE_AUTO_BALANCE]:
697
            needed_mem += bep[constants.BE_MEMORY]
698
        if nodeinfo['mfree'] < needed_mem:
699
          feedback_fn("  - ERROR: not enough memory on node %s to accomodate"
700
                      " failovers should node %s fail" % (node, prinode))
701
          bad = True
702
    return bad
703

    
704
  def CheckPrereq(self):
705
    """Check prerequisites.
706

707
    Transform the list of checks we're going to skip into a set and check that
708
    all its members are valid.
709

710
    """
711
    self.skip_set = frozenset(self.op.skip_checks)
712
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
713
      raise errors.OpPrereqError("Invalid checks to be skipped specified")
714

    
715
  def BuildHooksEnv(self):
716
    """Build hooks env.
717

718
    Cluster-Verify hooks just rone in the post phase and their failure makes
719
    the output be logged in the verify output and the verification to fail.
720

721
    """
722
    all_nodes = self.cfg.GetNodeList()
723
    # TODO: populate the environment with useful information for verify hooks
724
    env = {}
725
    return env, [], all_nodes
726

    
727
  def Exec(self, feedback_fn):
728
    """Verify integrity of cluster, performing various test on nodes.
729

730
    """
731
    bad = False
732
    feedback_fn("* Verifying global settings")
733
    for msg in self.cfg.VerifyConfig():
734
      feedback_fn("  - ERROR: %s" % msg)
735

    
736
    vg_name = self.cfg.GetVGName()
737
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
738
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
739
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
740
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
741
    i_non_redundant = [] # Non redundant instances
742
    i_non_a_balanced = [] # Non auto-balanced instances
743
    node_volume = {}
744
    node_instance = {}
745
    node_info = {}
746
    instance_cfg = {}
747

    
748
    # FIXME: verify OS list
749
    # do local checksums
750
    file_names = []
751
    file_names.append(constants.SSL_CERT_FILE)
752
    file_names.append(constants.CLUSTER_CONF_FILE)
753
    local_checksums = utils.FingerprintFiles(file_names)
754

    
755
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
756
    all_volumeinfo = self.rpc.call_volume_list(nodelist, vg_name)
757
    all_instanceinfo = self.rpc.call_instance_list(nodelist, hypervisors)
758
    all_vglist = self.rpc.call_vg_list(nodelist)
759
    node_verify_param = {
760
      'filelist': file_names,
761
      'nodelist': nodelist,
762
      'hypervisor': hypervisors,
763
      'node-net-test': [(node.name, node.primary_ip, node.secondary_ip)
764
                        for node in nodeinfo]
765
      }
766
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
767
                                           self.cfg.GetClusterName())
768
    all_rversion = self.rpc.call_version(nodelist)
769
    all_ninfo = self.rpc.call_node_info(nodelist, self.cfg.GetVGName(),
770
                                        self.cfg.GetHypervisorType())
771

    
772
    cluster = self.cfg.GetClusterInfo()
773
    for node in nodelist:
774
      feedback_fn("* Verifying node %s" % node)
775
      result = self._VerifyNode(node, file_names, local_checksums,
776
                                all_vglist[node], all_nvinfo[node],
777
                                all_rversion[node], feedback_fn)
778
      bad = bad or result
779

    
780
      # node_volume
781
      volumeinfo = all_volumeinfo[node]
782

    
783
      if isinstance(volumeinfo, basestring):
784
        feedback_fn("  - ERROR: LVM problem on node %s: %s" %
785
                    (node, volumeinfo[-400:].encode('string_escape')))
786
        bad = True
787
        node_volume[node] = {}
788
      elif not isinstance(volumeinfo, dict):
789
        feedback_fn("  - ERROR: connection to %s failed" % (node,))
790
        bad = True
791
        continue
792
      else:
793
        node_volume[node] = volumeinfo
794

    
795
      # node_instance
796
      nodeinstance = all_instanceinfo[node]
797
      if type(nodeinstance) != list:
798
        feedback_fn("  - ERROR: connection to %s failed" % (node,))
799
        bad = True
800
        continue
801

    
802
      node_instance[node] = nodeinstance
803

    
804
      # node_info
805
      nodeinfo = all_ninfo[node]
806
      if not isinstance(nodeinfo, dict):
807
        feedback_fn("  - ERROR: connection to %s failed" % (node,))
808
        bad = True
809
        continue
810

    
811
      try:
812
        node_info[node] = {
813
          "mfree": int(nodeinfo['memory_free']),
814
          "dfree": int(nodeinfo['vg_free']),
815
          "pinst": [],
816
          "sinst": [],
817
          # dictionary holding all instances this node is secondary for,
818
          # grouped by their primary node. Each key is a cluster node, and each
819
          # value is a list of instances which have the key as primary and the
820
          # current node as secondary.  this is handy to calculate N+1 memory
821
          # availability if you can only failover from a primary to its
822
          # secondary.
823
          "sinst-by-pnode": {},
824
        }
825
      except ValueError:
826
        feedback_fn("  - ERROR: invalid value returned from node %s" % (node,))
827
        bad = True
828
        continue
829

    
830
    node_vol_should = {}
831

    
832
    for instance in instancelist:
833
      feedback_fn("* Verifying instance %s" % instance)
834
      inst_config = self.cfg.GetInstanceInfo(instance)
835
      result =  self._VerifyInstance(instance, inst_config, node_volume,
836
                                     node_instance, feedback_fn)
837
      bad = bad or result
838

    
839
      inst_config.MapLVsByNode(node_vol_should)
840

    
841
      instance_cfg[instance] = inst_config
842

    
843
      pnode = inst_config.primary_node
844
      if pnode in node_info:
845
        node_info[pnode]['pinst'].append(instance)
846
      else:
847
        feedback_fn("  - ERROR: instance %s, connection to primary node"
848
                    " %s failed" % (instance, pnode))
849
        bad = True
850

    
851
      # If the instance is non-redundant we cannot survive losing its primary
852
      # node, so we are not N+1 compliant. On the other hand we have no disk
853
      # templates with more than one secondary so that situation is not well
854
      # supported either.
855
      # FIXME: does not support file-backed instances
856
      if len(inst_config.secondary_nodes) == 0:
857
        i_non_redundant.append(instance)
858
      elif len(inst_config.secondary_nodes) > 1:
859
        feedback_fn("  - WARNING: multiple secondaries for instance %s"
860
                    % instance)
861

    
862
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
863
        i_non_a_balanced.append(instance)
864

    
865
      for snode in inst_config.secondary_nodes:
866
        if snode in node_info:
867
          node_info[snode]['sinst'].append(instance)
868
          if pnode not in node_info[snode]['sinst-by-pnode']:
869
            node_info[snode]['sinst-by-pnode'][pnode] = []
870
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
871
        else:
872
          feedback_fn("  - ERROR: instance %s, connection to secondary node"
873
                      " %s failed" % (instance, snode))
874

    
875
    feedback_fn("* Verifying orphan volumes")
876
    result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
877
                                       feedback_fn)
878
    bad = bad or result
879

    
880
    feedback_fn("* Verifying remaining instances")
881
    result = self._VerifyOrphanInstances(instancelist, node_instance,
882
                                         feedback_fn)
883
    bad = bad or result
884

    
885
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
886
      feedback_fn("* Verifying N+1 Memory redundancy")
887
      result = self._VerifyNPlusOneMemory(node_info, instance_cfg, feedback_fn)
888
      bad = bad or result
889

    
890
    feedback_fn("* Other Notes")
891
    if i_non_redundant:
892
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
893
                  % len(i_non_redundant))
894

    
895
    if i_non_a_balanced:
896
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
897
                  % len(i_non_a_balanced))
898

    
899
    return not bad
900

    
901
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
902
    """Analize the post-hooks' result, handle it, and send some
903
    nicely-formatted feedback back to the user.
904

905
    Args:
906
      phase: the hooks phase that has just been run
907
      hooks_results: the results of the multi-node hooks rpc call
908
      feedback_fn: function to send feedback back to the caller
909
      lu_result: previous Exec result
910

911
    """
912
    # We only really run POST phase hooks, and are only interested in
913
    # their results
914
    if phase == constants.HOOKS_PHASE_POST:
915
      # Used to change hooks' output to proper indentation
916
      indent_re = re.compile('^', re.M)
917
      feedback_fn("* Hooks Results")
918
      if not hooks_results:
919
        feedback_fn("  - ERROR: general communication failure")
920
        lu_result = 1
921
      else:
922
        for node_name in hooks_results:
923
          show_node_header = True
924
          res = hooks_results[node_name]
925
          if res is False or not isinstance(res, list):
926
            feedback_fn("    Communication failure")
927
            lu_result = 1
928
            continue
929
          for script, hkr, output in res:
930
            if hkr == constants.HKR_FAIL:
931
              # The node header is only shown once, if there are
932
              # failing hooks on that node
933
              if show_node_header:
934
                feedback_fn("  Node %s:" % node_name)
935
                show_node_header = False
936
              feedback_fn("    ERROR: Script %s failed, output:" % script)
937
              output = indent_re.sub('      ', output)
938
              feedback_fn("%s" % output)
939
              lu_result = 1
940

    
941
      return lu_result
942

    
943

    
944
class LUVerifyDisks(NoHooksLU):
945
  """Verifies the cluster disks status.
946

947
  """
948
  _OP_REQP = []
949
  REQ_BGL = False
950

    
951
  def ExpandNames(self):
952
    self.needed_locks = {
953
      locking.LEVEL_NODE: locking.ALL_SET,
954
      locking.LEVEL_INSTANCE: locking.ALL_SET,
955
    }
956
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
957

    
958
  def CheckPrereq(self):
959
    """Check prerequisites.
960

961
    This has no prerequisites.
962

963
    """
964
    pass
965

    
966
  def Exec(self, feedback_fn):
967
    """Verify integrity of cluster disks.
968

969
    """
970
    result = res_nodes, res_nlvm, res_instances, res_missing = [], {}, [], {}
971

    
972
    vg_name = self.cfg.GetVGName()
973
    nodes = utils.NiceSort(self.cfg.GetNodeList())
974
    instances = [self.cfg.GetInstanceInfo(name)
975
                 for name in self.cfg.GetInstanceList()]
976

    
977
    nv_dict = {}
978
    for inst in instances:
979
      inst_lvs = {}
980
      if (inst.status != "up" or
981
          inst.disk_template not in constants.DTS_NET_MIRROR):
982
        continue
983
      inst.MapLVsByNode(inst_lvs)
984
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
985
      for node, vol_list in inst_lvs.iteritems():
986
        for vol in vol_list:
987
          nv_dict[(node, vol)] = inst
988

    
989
    if not nv_dict:
990
      return result
991

    
992
    node_lvs = self.rpc.call_volume_list(nodes, vg_name)
993

    
994
    to_act = set()
995
    for node in nodes:
996
      # node_volume
997
      lvs = node_lvs[node]
998

    
999
      if isinstance(lvs, basestring):
1000
        logging.warning("Error enumerating LVs on node %s: %s", node, lvs)
1001
        res_nlvm[node] = lvs
1002
      elif not isinstance(lvs, dict):
1003
        logging.warning("Connection to node %s failed or invalid data"
1004
                        " returned", node)
1005
        res_nodes.append(node)
1006
        continue
1007

    
1008
      for lv_name, (_, lv_inactive, lv_online) in lvs.iteritems():
1009
        inst = nv_dict.pop((node, lv_name), None)
1010
        if (not lv_online and inst is not None
1011
            and inst.name not in res_instances):
1012
          res_instances.append(inst.name)
1013

    
1014
    # any leftover items in nv_dict are missing LVs, let's arrange the
1015
    # data better
1016
    for key, inst in nv_dict.iteritems():
1017
      if inst.name not in res_missing:
1018
        res_missing[inst.name] = []
1019
      res_missing[inst.name].append(key)
1020

    
1021
    return result
1022

    
1023

    
1024
class LURenameCluster(LogicalUnit):
1025
  """Rename the cluster.
1026

1027
  """
1028
  HPATH = "cluster-rename"
1029
  HTYPE = constants.HTYPE_CLUSTER
1030
  _OP_REQP = ["name"]
1031

    
1032
  def BuildHooksEnv(self):
1033
    """Build hooks env.
1034

1035
    """
1036
    env = {
1037
      "OP_TARGET": self.cfg.GetClusterName(),
1038
      "NEW_NAME": self.op.name,
1039
      }
1040
    mn = self.cfg.GetMasterNode()
1041
    return env, [mn], [mn]
1042

    
1043
  def CheckPrereq(self):
1044
    """Verify that the passed name is a valid one.
1045

1046
    """
1047
    hostname = utils.HostInfo(self.op.name)
1048

    
1049
    new_name = hostname.name
1050
    self.ip = new_ip = hostname.ip
1051
    old_name = self.cfg.GetClusterName()
1052
    old_ip = self.cfg.GetMasterIP()
1053
    if new_name == old_name and new_ip == old_ip:
1054
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1055
                                 " cluster has changed")
1056
    if new_ip != old_ip:
1057
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1058
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1059
                                   " reachable on the network. Aborting." %
1060
                                   new_ip)
1061

    
1062
    self.op.name = new_name
1063

    
1064
  def Exec(self, feedback_fn):
1065
    """Rename the cluster.
1066

1067
    """
1068
    clustername = self.op.name
1069
    ip = self.ip
1070

    
1071
    # shutdown the master IP
1072
    master = self.cfg.GetMasterNode()
1073
    if not self.rpc.call_node_stop_master(master, False):
1074
      raise errors.OpExecError("Could not disable the master role")
1075

    
1076
    try:
1077
      # modify the sstore
1078
      # TODO: sstore
1079
      ss.SetKey(ss.SS_MASTER_IP, ip)
1080
      ss.SetKey(ss.SS_CLUSTER_NAME, clustername)
1081

    
1082
      # Distribute updated ss config to all nodes
1083
      myself = self.cfg.GetNodeInfo(master)
1084
      dist_nodes = self.cfg.GetNodeList()
1085
      if myself.name in dist_nodes:
1086
        dist_nodes.remove(myself.name)
1087

    
1088
      logging.debug("Copying updated ssconf data to all nodes")
1089
      for keyname in [ss.SS_CLUSTER_NAME, ss.SS_MASTER_IP]:
1090
        fname = ss.KeyToFilename(keyname)
1091
        result = self.rpc.call_upload_file(dist_nodes, fname)
1092
        for to_node in dist_nodes:
1093
          if not result[to_node]:
1094
            self.LogWarning("Copy of file %s to node %s failed",
1095
                            fname, to_node)
1096
    finally:
1097
      if not self.rpc.call_node_start_master(master, False):
1098
        self.LogWarning("Could not re-enable the master role on"
1099
                        " the master, please restart manually.")
1100

    
1101

    
1102
def _RecursiveCheckIfLVMBased(disk):
1103
  """Check if the given disk or its children are lvm-based.
1104

1105
  Args:
1106
    disk: ganeti.objects.Disk object
1107

1108
  Returns:
1109
    boolean indicating whether a LD_LV dev_type was found or not
1110

1111
  """
1112
  if disk.children:
1113
    for chdisk in disk.children:
1114
      if _RecursiveCheckIfLVMBased(chdisk):
1115
        return True
1116
  return disk.dev_type == constants.LD_LV
1117

    
1118

    
1119
class LUSetClusterParams(LogicalUnit):
1120
  """Change the parameters of the cluster.
1121

1122
  """
1123
  HPATH = "cluster-modify"
1124
  HTYPE = constants.HTYPE_CLUSTER
1125
  _OP_REQP = []
1126
  REQ_BGL = False
1127

    
1128
  def ExpandNames(self):
1129
    # FIXME: in the future maybe other cluster params won't require checking on
1130
    # all nodes to be modified.
1131
    self.needed_locks = {
1132
      locking.LEVEL_NODE: locking.ALL_SET,
1133
    }
1134
    self.share_locks[locking.LEVEL_NODE] = 1
1135

    
1136
  def BuildHooksEnv(self):
1137
    """Build hooks env.
1138

1139
    """
1140
    env = {
1141
      "OP_TARGET": self.cfg.GetClusterName(),
1142
      "NEW_VG_NAME": self.op.vg_name,
1143
      }
1144
    mn = self.cfg.GetMasterNode()
1145
    return env, [mn], [mn]
1146

    
1147
  def CheckPrereq(self):
1148
    """Check prerequisites.
1149

1150
    This checks whether the given params don't conflict and
1151
    if the given volume group is valid.
1152

1153
    """
1154
    # FIXME: This only works because there is only one parameter that can be
1155
    # changed or removed.
1156
    if self.op.vg_name is not None and not self.op.vg_name:
1157
      instances = self.cfg.GetAllInstancesInfo().values()
1158
      for inst in instances:
1159
        for disk in inst.disks:
1160
          if _RecursiveCheckIfLVMBased(disk):
1161
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1162
                                       " lvm-based instances exist")
1163

    
1164
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1165

    
1166
    # if vg_name not None, checks given volume group on all nodes
1167
    if self.op.vg_name:
1168
      vglist = self.rpc.call_vg_list(node_list)
1169
      for node in node_list:
1170
        vgstatus = utils.CheckVolumeGroupSize(vglist[node], self.op.vg_name,
1171
                                              constants.MIN_VG_SIZE)
1172
        if vgstatus:
1173
          raise errors.OpPrereqError("Error on node '%s': %s" %
1174
                                     (node, vgstatus))
1175

    
1176
    self.cluster = cluster = self.cfg.GetClusterInfo()
1177
    # beparams changes do not need validation (we can't validate?),
1178
    # but we still process here
1179
    if self.op.beparams:
1180
      self.new_beparams = cluster.FillDict(
1181
        cluster.beparams[constants.BEGR_DEFAULT], self.op.beparams)
1182

    
1183
    # hypervisor list/parameters
1184
    self.new_hvparams = cluster.FillDict(cluster.hvparams, {})
1185
    if self.op.hvparams:
1186
      if not isinstance(self.op.hvparams, dict):
1187
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
1188
      for hv_name, hv_dict in self.op.hvparams.items():
1189
        if hv_name not in self.new_hvparams:
1190
          self.new_hvparams[hv_name] = hv_dict
1191
        else:
1192
          self.new_hvparams[hv_name].update(hv_dict)
1193

    
1194
    if self.op.enabled_hypervisors is not None:
1195
      self.hv_list = self.op.enabled_hypervisors
1196
    else:
1197
      self.hv_list = cluster.enabled_hypervisors
1198

    
1199
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
1200
      # either the enabled list has changed, or the parameters have, validate
1201
      for hv_name, hv_params in self.new_hvparams.items():
1202
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
1203
            (self.op.enabled_hypervisors and
1204
             hv_name in self.op.enabled_hypervisors)):
1205
          # either this is a new hypervisor, or its parameters have changed
1206
          hv_class = hypervisor.GetHypervisor(hv_name)
1207
          hv_class.CheckParameterSyntax(hv_params)
1208
          _CheckHVParams(self, node_list, hv_name, hv_params)
1209

    
1210
  def Exec(self, feedback_fn):
1211
    """Change the parameters of the cluster.
1212

1213
    """
1214
    if self.op.vg_name is not None:
1215
      if self.op.vg_name != self.cfg.GetVGName():
1216
        self.cfg.SetVGName(self.op.vg_name)
1217
      else:
1218
        feedback_fn("Cluster LVM configuration already in desired"
1219
                    " state, not changing")
1220
    if self.op.hvparams:
1221
      self.cluster.hvparams = self.new_hvparams
1222
    if self.op.enabled_hypervisors is not None:
1223
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1224
    if self.op.beparams:
1225
      self.cluster.beparams[constants.BEGR_DEFAULT] = self.new_beparams
1226
    self.cfg.Update(self.cluster)
1227

    
1228

    
1229
def _WaitForSync(lu, instance, oneshot=False, unlock=False):
1230
  """Sleep and poll for an instance's disk to sync.
1231

1232
  """
1233
  if not instance.disks:
1234
    return True
1235

    
1236
  if not oneshot:
1237
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
1238

    
1239
  node = instance.primary_node
1240

    
1241
  for dev in instance.disks:
1242
    lu.cfg.SetDiskID(dev, node)
1243

    
1244
  retries = 0
1245
  while True:
1246
    max_time = 0
1247
    done = True
1248
    cumul_degraded = False
1249
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
1250
    if not rstats:
1251
      lu.LogWarning("Can't get any data from node %s", node)
1252
      retries += 1
1253
      if retries >= 10:
1254
        raise errors.RemoteError("Can't contact node %s for mirror data,"
1255
                                 " aborting." % node)
1256
      time.sleep(6)
1257
      continue
1258
    retries = 0
1259
    for i in range(len(rstats)):
1260
      mstat = rstats[i]
1261
      if mstat is None:
1262
        lu.LogWarning("Can't compute data for node %s/%s",
1263
                           node, instance.disks[i].iv_name)
1264
        continue
1265
      # we ignore the ldisk parameter
1266
      perc_done, est_time, is_degraded, _ = mstat
1267
      cumul_degraded = cumul_degraded or (is_degraded and perc_done is None)
1268
      if perc_done is not None:
1269
        done = False
1270
        if est_time is not None:
1271
          rem_time = "%d estimated seconds remaining" % est_time
1272
          max_time = est_time
1273
        else:
1274
          rem_time = "no time estimate"
1275
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
1276
                        (instance.disks[i].iv_name, perc_done, rem_time))
1277
    if done or oneshot:
1278
      break
1279

    
1280
    time.sleep(min(60, max_time))
1281

    
1282
  if done:
1283
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
1284
  return not cumul_degraded
1285

    
1286

    
1287
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
1288
  """Check that mirrors are not degraded.
1289

1290
  The ldisk parameter, if True, will change the test from the
1291
  is_degraded attribute (which represents overall non-ok status for
1292
  the device(s)) to the ldisk (representing the local storage status).
1293

1294
  """
1295
  lu.cfg.SetDiskID(dev, node)
1296
  if ldisk:
1297
    idx = 6
1298
  else:
1299
    idx = 5
1300

    
1301
  result = True
1302
  if on_primary or dev.AssembleOnSecondary():
1303
    rstats = lu.rpc.call_blockdev_find(node, dev)
1304
    if not rstats:
1305
      logging.warning("Node %s: disk degraded, not found or node down", node)
1306
      result = False
1307
    else:
1308
      result = result and (not rstats[idx])
1309
  if dev.children:
1310
    for child in dev.children:
1311
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
1312

    
1313
  return result
1314

    
1315

    
1316
class LUDiagnoseOS(NoHooksLU):
1317
  """Logical unit for OS diagnose/query.
1318

1319
  """
1320
  _OP_REQP = ["output_fields", "names"]
1321
  REQ_BGL = False
1322

    
1323
  def ExpandNames(self):
1324
    if self.op.names:
1325
      raise errors.OpPrereqError("Selective OS query not supported")
1326

    
1327
    self.dynamic_fields = frozenset(["name", "valid", "node_status"])
1328
    _CheckOutputFields(static=[],
1329
                       dynamic=self.dynamic_fields,
1330
                       selected=self.op.output_fields)
1331

    
1332
    # Lock all nodes, in shared mode
1333
    self.needed_locks = {}
1334
    self.share_locks[locking.LEVEL_NODE] = 1
1335
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
1336

    
1337
  def CheckPrereq(self):
1338
    """Check prerequisites.
1339

1340
    """
1341

    
1342
  @staticmethod
1343
  def _DiagnoseByOS(node_list, rlist):
1344
    """Remaps a per-node return list into an a per-os per-node dictionary
1345

1346
      Args:
1347
        node_list: a list with the names of all nodes
1348
        rlist: a map with node names as keys and OS objects as values
1349

1350
      Returns:
1351
        map: a map with osnames as keys and as value another map, with
1352
             nodes as
1353
             keys and list of OS objects as values
1354
             e.g. {"debian-etch": {"node1": [<object>,...],
1355
                                   "node2": [<object>,]}
1356
                  }
1357

1358
    """
1359
    all_os = {}
1360
    for node_name, nr in rlist.iteritems():
1361
      if not nr:
1362
        continue
1363
      for os_obj in nr:
1364
        if os_obj.name not in all_os:
1365
          # build a list of nodes for this os containing empty lists
1366
          # for each node in node_list
1367
          all_os[os_obj.name] = {}
1368
          for nname in node_list:
1369
            all_os[os_obj.name][nname] = []
1370
        all_os[os_obj.name][node_name].append(os_obj)
1371
    return all_os
1372

    
1373
  def Exec(self, feedback_fn):
1374
    """Compute the list of OSes.
1375

1376
    """
1377
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1378
    node_data = self.rpc.call_os_diagnose(node_list)
1379
    if node_data == False:
1380
      raise errors.OpExecError("Can't gather the list of OSes")
1381
    pol = self._DiagnoseByOS(node_list, node_data)
1382
    output = []
1383
    for os_name, os_data in pol.iteritems():
1384
      row = []
1385
      for field in self.op.output_fields:
1386
        if field == "name":
1387
          val = os_name
1388
        elif field == "valid":
1389
          val = utils.all([osl and osl[0] for osl in os_data.values()])
1390
        elif field == "node_status":
1391
          val = {}
1392
          for node_name, nos_list in os_data.iteritems():
1393
            val[node_name] = [(v.status, v.path) for v in nos_list]
1394
        else:
1395
          raise errors.ParameterError(field)
1396
        row.append(val)
1397
      output.append(row)
1398

    
1399
    return output
1400

    
1401

    
1402
class LURemoveNode(LogicalUnit):
1403
  """Logical unit for removing a node.
1404

1405
  """
1406
  HPATH = "node-remove"
1407
  HTYPE = constants.HTYPE_NODE
1408
  _OP_REQP = ["node_name"]
1409

    
1410
  def BuildHooksEnv(self):
1411
    """Build hooks env.
1412

1413
    This doesn't run on the target node in the pre phase as a failed
1414
    node would then be impossible to remove.
1415

1416
    """
1417
    env = {
1418
      "OP_TARGET": self.op.node_name,
1419
      "NODE_NAME": self.op.node_name,
1420
      }
1421
    all_nodes = self.cfg.GetNodeList()
1422
    all_nodes.remove(self.op.node_name)
1423
    return env, all_nodes, all_nodes
1424

    
1425
  def CheckPrereq(self):
1426
    """Check prerequisites.
1427

1428
    This checks:
1429
     - the node exists in the configuration
1430
     - it does not have primary or secondary instances
1431
     - it's not the master
1432

1433
    Any errors are signalled by raising errors.OpPrereqError.
1434

1435
    """
1436
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
1437
    if node is None:
1438
      raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
1439

    
1440
    instance_list = self.cfg.GetInstanceList()
1441

    
1442
    masternode = self.cfg.GetMasterNode()
1443
    if node.name == masternode:
1444
      raise errors.OpPrereqError("Node is the master node,"
1445
                                 " you need to failover first.")
1446

    
1447
    for instance_name in instance_list:
1448
      instance = self.cfg.GetInstanceInfo(instance_name)
1449
      if node.name == instance.primary_node:
1450
        raise errors.OpPrereqError("Instance %s still running on the node,"
1451
                                   " please remove first." % instance_name)
1452
      if node.name in instance.secondary_nodes:
1453
        raise errors.OpPrereqError("Instance %s has node as a secondary,"
1454
                                   " please remove first." % instance_name)
1455
    self.op.node_name = node.name
1456
    self.node = node
1457

    
1458
  def Exec(self, feedback_fn):
1459
    """Removes the node from the cluster.
1460

1461
    """
1462
    node = self.node
1463
    logging.info("Stopping the node daemon and removing configs from node %s",
1464
                 node.name)
1465

    
1466
    self.context.RemoveNode(node.name)
1467

    
1468
    self.rpc.call_node_leave_cluster(node.name)
1469

    
1470

    
1471
class LUQueryNodes(NoHooksLU):
1472
  """Logical unit for querying nodes.
1473

1474
  """
1475
  _OP_REQP = ["output_fields", "names"]
1476
  REQ_BGL = False
1477

    
1478
  def ExpandNames(self):
1479
    self.dynamic_fields = frozenset([
1480
      "dtotal", "dfree",
1481
      "mtotal", "mnode", "mfree",
1482
      "bootid",
1483
      "ctotal",
1484
      ])
1485

    
1486
    self.static_fields = frozenset([
1487
      "name", "pinst_cnt", "sinst_cnt",
1488
      "pinst_list", "sinst_list",
1489
      "pip", "sip", "tags",
1490
      "serial_no",
1491
      ])
1492

    
1493
    _CheckOutputFields(static=self.static_fields,
1494
                       dynamic=self.dynamic_fields,
1495
                       selected=self.op.output_fields)
1496

    
1497
    self.needed_locks = {}
1498
    self.share_locks[locking.LEVEL_NODE] = 1
1499

    
1500
    if self.op.names:
1501
      self.wanted = _GetWantedNodes(self, self.op.names)
1502
    else:
1503
      self.wanted = locking.ALL_SET
1504

    
1505
    self.do_locking = not self.static_fields.issuperset(self.op.output_fields)
1506
    if self.do_locking:
1507
      # if we don't request only static fields, we need to lock the nodes
1508
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
1509

    
1510

    
1511
  def CheckPrereq(self):
1512
    """Check prerequisites.
1513

1514
    """
1515
    # The validation of the node list is done in the _GetWantedNodes,
1516
    # if non empty, and if empty, there's no validation to do
1517
    pass
1518

    
1519
  def Exec(self, feedback_fn):
1520
    """Computes the list of nodes and their attributes.
1521

1522
    """
1523
    all_info = self.cfg.GetAllNodesInfo()
1524
    if self.do_locking:
1525
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
1526
    elif self.wanted != locking.ALL_SET:
1527
      nodenames = self.wanted
1528
      missing = set(nodenames).difference(all_info.keys())
1529
      if missing:
1530
        raise errors.OpExecError(
1531
          "Some nodes were removed before retrieving their data: %s" % missing)
1532
    else:
1533
      nodenames = all_info.keys()
1534

    
1535
    nodenames = utils.NiceSort(nodenames)
1536
    nodelist = [all_info[name] for name in nodenames]
1537

    
1538
    # begin data gathering
1539

    
1540
    if self.dynamic_fields.intersection(self.op.output_fields):
1541
      live_data = {}
1542
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
1543
                                          self.cfg.GetHypervisorType())
1544
      for name in nodenames:
1545
        nodeinfo = node_data.get(name, None)
1546
        if nodeinfo:
1547
          live_data[name] = {
1548
            "mtotal": utils.TryConvert(int, nodeinfo['memory_total']),
1549
            "mnode": utils.TryConvert(int, nodeinfo['memory_dom0']),
1550
            "mfree": utils.TryConvert(int, nodeinfo['memory_free']),
1551
            "dtotal": utils.TryConvert(int, nodeinfo['vg_size']),
1552
            "dfree": utils.TryConvert(int, nodeinfo['vg_free']),
1553
            "ctotal": utils.TryConvert(int, nodeinfo['cpu_total']),
1554
            "bootid": nodeinfo['bootid'],
1555
            }
1556
        else:
1557
          live_data[name] = {}
1558
    else:
1559
      live_data = dict.fromkeys(nodenames, {})
1560

    
1561
    node_to_primary = dict([(name, set()) for name in nodenames])
1562
    node_to_secondary = dict([(name, set()) for name in nodenames])
1563

    
1564
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
1565
                             "sinst_cnt", "sinst_list"))
1566
    if inst_fields & frozenset(self.op.output_fields):
1567
      instancelist = self.cfg.GetInstanceList()
1568

    
1569
      for instance_name in instancelist:
1570
        inst = self.cfg.GetInstanceInfo(instance_name)
1571
        if inst.primary_node in node_to_primary:
1572
          node_to_primary[inst.primary_node].add(inst.name)
1573
        for secnode in inst.secondary_nodes:
1574
          if secnode in node_to_secondary:
1575
            node_to_secondary[secnode].add(inst.name)
1576

    
1577
    # end data gathering
1578

    
1579
    output = []
1580
    for node in nodelist:
1581
      node_output = []
1582
      for field in self.op.output_fields:
1583
        if field == "name":
1584
          val = node.name
1585
        elif field == "pinst_list":
1586
          val = list(node_to_primary[node.name])
1587
        elif field == "sinst_list":
1588
          val = list(node_to_secondary[node.name])
1589
        elif field == "pinst_cnt":
1590
          val = len(node_to_primary[node.name])
1591
        elif field == "sinst_cnt":
1592
          val = len(node_to_secondary[node.name])
1593
        elif field == "pip":
1594
          val = node.primary_ip
1595
        elif field == "sip":
1596
          val = node.secondary_ip
1597
        elif field == "tags":
1598
          val = list(node.GetTags())
1599
        elif field == "serial_no":
1600
          val = node.serial_no
1601
        elif field in self.dynamic_fields:
1602
          val = live_data[node.name].get(field, None)
1603
        else:
1604
          raise errors.ParameterError(field)
1605
        node_output.append(val)
1606
      output.append(node_output)
1607

    
1608
    return output
1609

    
1610

    
1611
class LUQueryNodeVolumes(NoHooksLU):
1612
  """Logical unit for getting volumes on node(s).
1613

1614
  """
1615
  _OP_REQP = ["nodes", "output_fields"]
1616
  REQ_BGL = False
1617

    
1618
  def ExpandNames(self):
1619
    _CheckOutputFields(static=["node"],
1620
                       dynamic=["phys", "vg", "name", "size", "instance"],
1621
                       selected=self.op.output_fields)
1622

    
1623
    self.needed_locks = {}
1624
    self.share_locks[locking.LEVEL_NODE] = 1
1625
    if not self.op.nodes:
1626
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
1627
    else:
1628
      self.needed_locks[locking.LEVEL_NODE] = \
1629
        _GetWantedNodes(self, self.op.nodes)
1630

    
1631
  def CheckPrereq(self):
1632
    """Check prerequisites.
1633

1634
    This checks that the fields required are valid output fields.
1635

1636
    """
1637
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
1638

    
1639
  def Exec(self, feedback_fn):
1640
    """Computes the list of nodes and their attributes.
1641

1642
    """
1643
    nodenames = self.nodes
1644
    volumes = self.rpc.call_node_volumes(nodenames)
1645

    
1646
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
1647
             in self.cfg.GetInstanceList()]
1648

    
1649
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
1650

    
1651
    output = []
1652
    for node in nodenames:
1653
      if node not in volumes or not volumes[node]:
1654
        continue
1655

    
1656
      node_vols = volumes[node][:]
1657
      node_vols.sort(key=lambda vol: vol['dev'])
1658

    
1659
      for vol in node_vols:
1660
        node_output = []
1661
        for field in self.op.output_fields:
1662
          if field == "node":
1663
            val = node
1664
          elif field == "phys":
1665
            val = vol['dev']
1666
          elif field == "vg":
1667
            val = vol['vg']
1668
          elif field == "name":
1669
            val = vol['name']
1670
          elif field == "size":
1671
            val = int(float(vol['size']))
1672
          elif field == "instance":
1673
            for inst in ilist:
1674
              if node not in lv_by_node[inst]:
1675
                continue
1676
              if vol['name'] in lv_by_node[inst][node]:
1677
                val = inst.name
1678
                break
1679
            else:
1680
              val = '-'
1681
          else:
1682
            raise errors.ParameterError(field)
1683
          node_output.append(str(val))
1684

    
1685
        output.append(node_output)
1686

    
1687
    return output
1688

    
1689

    
1690
class LUAddNode(LogicalUnit):
1691
  """Logical unit for adding node to the cluster.
1692

1693
  """
1694
  HPATH = "node-add"
1695
  HTYPE = constants.HTYPE_NODE
1696
  _OP_REQP = ["node_name"]
1697

    
1698
  def BuildHooksEnv(self):
1699
    """Build hooks env.
1700

1701
    This will run on all nodes before, and on all nodes + the new node after.
1702

1703
    """
1704
    env = {
1705
      "OP_TARGET": self.op.node_name,
1706
      "NODE_NAME": self.op.node_name,
1707
      "NODE_PIP": self.op.primary_ip,
1708
      "NODE_SIP": self.op.secondary_ip,
1709
      }
1710
    nodes_0 = self.cfg.GetNodeList()
1711
    nodes_1 = nodes_0 + [self.op.node_name, ]
1712
    return env, nodes_0, nodes_1
1713

    
1714
  def CheckPrereq(self):
1715
    """Check prerequisites.
1716

1717
    This checks:
1718
     - the new node is not already in the config
1719
     - it is resolvable
1720
     - its parameters (single/dual homed) matches the cluster
1721

1722
    Any errors are signalled by raising errors.OpPrereqError.
1723

1724
    """
1725
    node_name = self.op.node_name
1726
    cfg = self.cfg
1727

    
1728
    dns_data = utils.HostInfo(node_name)
1729

    
1730
    node = dns_data.name
1731
    primary_ip = self.op.primary_ip = dns_data.ip
1732
    secondary_ip = getattr(self.op, "secondary_ip", None)
1733
    if secondary_ip is None:
1734
      secondary_ip = primary_ip
1735
    if not utils.IsValidIP(secondary_ip):
1736
      raise errors.OpPrereqError("Invalid secondary IP given")
1737
    self.op.secondary_ip = secondary_ip
1738

    
1739
    node_list = cfg.GetNodeList()
1740
    if not self.op.readd and node in node_list:
1741
      raise errors.OpPrereqError("Node %s is already in the configuration" %
1742
                                 node)
1743
    elif self.op.readd and node not in node_list:
1744
      raise errors.OpPrereqError("Node %s is not in the configuration" % node)
1745

    
1746
    for existing_node_name in node_list:
1747
      existing_node = cfg.GetNodeInfo(existing_node_name)
1748

    
1749
      if self.op.readd and node == existing_node_name:
1750
        if (existing_node.primary_ip != primary_ip or
1751
            existing_node.secondary_ip != secondary_ip):
1752
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
1753
                                     " address configuration as before")
1754
        continue
1755

    
1756
      if (existing_node.primary_ip == primary_ip or
1757
          existing_node.secondary_ip == primary_ip or
1758
          existing_node.primary_ip == secondary_ip or
1759
          existing_node.secondary_ip == secondary_ip):
1760
        raise errors.OpPrereqError("New node ip address(es) conflict with"
1761
                                   " existing node %s" % existing_node.name)
1762

    
1763
    # check that the type of the node (single versus dual homed) is the
1764
    # same as for the master
1765
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
1766
    master_singlehomed = myself.secondary_ip == myself.primary_ip
1767
    newbie_singlehomed = secondary_ip == primary_ip
1768
    if master_singlehomed != newbie_singlehomed:
1769
      if master_singlehomed:
1770
        raise errors.OpPrereqError("The master has no private ip but the"
1771
                                   " new node has one")
1772
      else:
1773
        raise errors.OpPrereqError("The master has a private ip but the"
1774
                                   " new node doesn't have one")
1775

    
1776
    # checks reachablity
1777
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
1778
      raise errors.OpPrereqError("Node not reachable by ping")
1779

    
1780
    if not newbie_singlehomed:
1781
      # check reachability from my secondary ip to newbie's secondary ip
1782
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
1783
                           source=myself.secondary_ip):
1784
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
1785
                                   " based ping to noded port")
1786

    
1787
    self.new_node = objects.Node(name=node,
1788
                                 primary_ip=primary_ip,
1789
                                 secondary_ip=secondary_ip)
1790

    
1791
  def Exec(self, feedback_fn):
1792
    """Adds the new node to the cluster.
1793

1794
    """
1795
    new_node = self.new_node
1796
    node = new_node.name
1797

    
1798
    # check connectivity
1799
    result = self.rpc.call_version([node])[node]
1800
    if result:
1801
      if constants.PROTOCOL_VERSION == result:
1802
        logging.info("Communication to node %s fine, sw version %s match",
1803
                     node, result)
1804
      else:
1805
        raise errors.OpExecError("Version mismatch master version %s,"
1806
                                 " node version %s" %
1807
                                 (constants.PROTOCOL_VERSION, result))
1808
    else:
1809
      raise errors.OpExecError("Cannot get version from the new node")
1810

    
1811
    # setup ssh on node
1812
    logging.info("Copy ssh key to node %s", node)
1813
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1814
    keyarray = []
1815
    keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
1816
                constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
1817
                priv_key, pub_key]
1818

    
1819
    for i in keyfiles:
1820
      f = open(i, 'r')
1821
      try:
1822
        keyarray.append(f.read())
1823
      finally:
1824
        f.close()
1825

    
1826
    result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
1827
                                    keyarray[2],
1828
                                    keyarray[3], keyarray[4], keyarray[5])
1829

    
1830
    if not result:
1831
      raise errors.OpExecError("Cannot transfer ssh keys to the new node")
1832

    
1833
    # Add node to our /etc/hosts, and add key to known_hosts
1834
    utils.AddHostToEtcHosts(new_node.name)
1835

    
1836
    if new_node.secondary_ip != new_node.primary_ip:
1837
      if not self.rpc.call_node_has_ip_address(new_node.name,
1838
                                               new_node.secondary_ip):
1839
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
1840
                                 " you gave (%s). Please fix and re-run this"
1841
                                 " command." % new_node.secondary_ip)
1842

    
1843
    node_verify_list = [self.cfg.GetMasterNode()]
1844
    node_verify_param = {
1845
      'nodelist': [node],
1846
      # TODO: do a node-net-test as well?
1847
    }
1848

    
1849
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
1850
                                       self.cfg.GetClusterName())
1851
    for verifier in node_verify_list:
1852
      if not result[verifier]:
1853
        raise errors.OpExecError("Cannot communicate with %s's node daemon"
1854
                                 " for remote verification" % verifier)
1855
      if result[verifier]['nodelist']:
1856
        for failed in result[verifier]['nodelist']:
1857
          feedback_fn("ssh/hostname verification failed %s -> %s" %
1858
                      (verifier, result[verifier]['nodelist'][failed]))
1859
        raise errors.OpExecError("ssh/hostname verification failed.")
1860

    
1861
    # Distribute updated /etc/hosts and known_hosts to all nodes,
1862
    # including the node just added
1863
    myself = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
1864
    dist_nodes = self.cfg.GetNodeList()
1865
    if not self.op.readd:
1866
      dist_nodes.append(node)
1867
    if myself.name in dist_nodes:
1868
      dist_nodes.remove(myself.name)
1869

    
1870
    logging.debug("Copying hosts and known_hosts to all nodes")
1871
    for fname in (constants.ETC_HOSTS, constants.SSH_KNOWN_HOSTS_FILE):
1872
      result = self.rpc.call_upload_file(dist_nodes, fname)
1873
      for to_node in dist_nodes:
1874
        if not result[to_node]:
1875
          logging.error("Copy of file %s to node %s failed", fname, to_node)
1876

    
1877
    to_copy = []
1878
    if constants.HT_XEN_HVM in self.cfg.GetClusterInfo().enabled_hypervisors:
1879
      to_copy.append(constants.VNC_PASSWORD_FILE)
1880
    for fname in to_copy:
1881
      result = self.rpc.call_upload_file([node], fname)
1882
      if not result[node]:
1883
        logging.error("Could not copy file %s to node %s", fname, node)
1884

    
1885
    if self.op.readd:
1886
      self.context.ReaddNode(new_node)
1887
    else:
1888
      self.context.AddNode(new_node)
1889

    
1890

    
1891
class LUQueryClusterInfo(NoHooksLU):
1892
  """Query cluster configuration.
1893

1894
  """
1895
  _OP_REQP = []
1896
  REQ_MASTER = False
1897
  REQ_BGL = False
1898

    
1899
  def ExpandNames(self):
1900
    self.needed_locks = {}
1901

    
1902
  def CheckPrereq(self):
1903
    """No prerequsites needed for this LU.
1904

1905
    """
1906
    pass
1907

    
1908
  def Exec(self, feedback_fn):
1909
    """Return cluster config.
1910

1911
    """
1912
    cluster = self.cfg.GetClusterInfo()
1913
    result = {
1914
      "software_version": constants.RELEASE_VERSION,
1915
      "protocol_version": constants.PROTOCOL_VERSION,
1916
      "config_version": constants.CONFIG_VERSION,
1917
      "os_api_version": constants.OS_API_VERSION,
1918
      "export_version": constants.EXPORT_VERSION,
1919
      "architecture": (platform.architecture()[0], platform.machine()),
1920
      "name": cluster.cluster_name,
1921
      "master": cluster.master_node,
1922
      "default_hypervisor": cluster.default_hypervisor,
1923
      "enabled_hypervisors": cluster.enabled_hypervisors,
1924
      "hvparams": cluster.hvparams,
1925
      "beparams": cluster.beparams,
1926
      }
1927

    
1928
    return result
1929

    
1930

    
1931
class LUQueryConfigValues(NoHooksLU):
1932
  """Return configuration values.
1933

1934
  """
1935
  _OP_REQP = []
1936
  REQ_BGL = False
1937

    
1938
  def ExpandNames(self):
1939
    self.needed_locks = {}
1940

    
1941
    static_fields = ["cluster_name", "master_node", "drain_flag"]
1942
    _CheckOutputFields(static=static_fields,
1943
                       dynamic=[],
1944
                       selected=self.op.output_fields)
1945

    
1946
  def CheckPrereq(self):
1947
    """No prerequisites.
1948

1949
    """
1950
    pass
1951

    
1952
  def Exec(self, feedback_fn):
1953
    """Dump a representation of the cluster config to the standard output.
1954

1955
    """
1956
    values = []
1957
    for field in self.op.output_fields:
1958
      if field == "cluster_name":
1959
        entry = self.cfg.GetClusterName()
1960
      elif field == "master_node":
1961
        entry = self.cfg.GetMasterNode()
1962
      elif field == "drain_flag":
1963
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
1964
      else:
1965
        raise errors.ParameterError(field)
1966
      values.append(entry)
1967
    return values
1968

    
1969

    
1970
class LUActivateInstanceDisks(NoHooksLU):
1971
  """Bring up an instance's disks.
1972

1973
  """
1974
  _OP_REQP = ["instance_name"]
1975
  REQ_BGL = False
1976

    
1977
  def ExpandNames(self):
1978
    self._ExpandAndLockInstance()
1979
    self.needed_locks[locking.LEVEL_NODE] = []
1980
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1981

    
1982
  def DeclareLocks(self, level):
1983
    if level == locking.LEVEL_NODE:
1984
      self._LockInstancesNodes()
1985

    
1986
  def CheckPrereq(self):
1987
    """Check prerequisites.
1988

1989
    This checks that the instance is in the cluster.
1990

1991
    """
1992
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
1993
    assert self.instance is not None, \
1994
      "Cannot retrieve locked instance %s" % self.op.instance_name
1995

    
1996
  def Exec(self, feedback_fn):
1997
    """Activate the disks.
1998

1999
    """
2000
    disks_ok, disks_info = _AssembleInstanceDisks(self, self.instance)
2001
    if not disks_ok:
2002
      raise errors.OpExecError("Cannot activate block devices")
2003

    
2004
    return disks_info
2005

    
2006

    
2007
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
2008
  """Prepare the block devices for an instance.
2009

2010
  This sets up the block devices on all nodes.
2011

2012
  Args:
2013
    instance: a ganeti.objects.Instance object
2014
    ignore_secondaries: if true, errors on secondary nodes won't result
2015
                        in an error return from the function
2016

2017
  Returns:
2018
    false if the operation failed
2019
    list of (host, instance_visible_name, node_visible_name) if the operation
2020
         suceeded with the mapping from node devices to instance devices
2021
  """
2022
  device_info = []
2023
  disks_ok = True
2024
  iname = instance.name
2025
  # With the two passes mechanism we try to reduce the window of
2026
  # opportunity for the race condition of switching DRBD to primary
2027
  # before handshaking occured, but we do not eliminate it
2028

    
2029
  # The proper fix would be to wait (with some limits) until the
2030
  # connection has been made and drbd transitions from WFConnection
2031
  # into any other network-connected state (Connected, SyncTarget,
2032
  # SyncSource, etc.)
2033

    
2034
  # 1st pass, assemble on all nodes in secondary mode
2035
  for inst_disk in instance.disks:
2036
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2037
      lu.cfg.SetDiskID(node_disk, node)
2038
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
2039
      if not result:
2040
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
2041
                           " (is_primary=False, pass=1)",
2042
                           inst_disk.iv_name, node)
2043
        if not ignore_secondaries:
2044
          disks_ok = False
2045

    
2046
  # FIXME: race condition on drbd migration to primary
2047

    
2048
  # 2nd pass, do only the primary node
2049
  for inst_disk in instance.disks:
2050
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2051
      if node != instance.primary_node:
2052
        continue
2053
      lu.cfg.SetDiskID(node_disk, node)
2054
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
2055
      if not result:
2056
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
2057
                           " (is_primary=True, pass=2)",
2058
                           inst_disk.iv_name, node)
2059
        disks_ok = False
2060
    device_info.append((instance.primary_node, inst_disk.iv_name, result))
2061

    
2062
  # leave the disks configured for the primary node
2063
  # this is a workaround that would be fixed better by
2064
  # improving the logical/physical id handling
2065
  for disk in instance.disks:
2066
    lu.cfg.SetDiskID(disk, instance.primary_node)
2067

    
2068
  return disks_ok, device_info
2069

    
2070

    
2071
def _StartInstanceDisks(lu, instance, force):
2072
  """Start the disks of an instance.
2073

2074
  """
2075
  disks_ok, dummy = _AssembleInstanceDisks(lu, instance,
2076
                                           ignore_secondaries=force)
2077
  if not disks_ok:
2078
    _ShutdownInstanceDisks(lu, instance)
2079
    if force is not None and not force:
2080
      lu.proc.LogWarning("", hint="If the message above refers to a"
2081
                         " secondary node,"
2082
                         " you can retry the operation using '--force'.")
2083
    raise errors.OpExecError("Disk consistency error")
2084

    
2085

    
2086
class LUDeactivateInstanceDisks(NoHooksLU):
2087
  """Shutdown an instance's disks.
2088

2089
  """
2090
  _OP_REQP = ["instance_name"]
2091
  REQ_BGL = False
2092

    
2093
  def ExpandNames(self):
2094
    self._ExpandAndLockInstance()
2095
    self.needed_locks[locking.LEVEL_NODE] = []
2096
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2097

    
2098
  def DeclareLocks(self, level):
2099
    if level == locking.LEVEL_NODE:
2100
      self._LockInstancesNodes()
2101

    
2102
  def CheckPrereq(self):
2103
    """Check prerequisites.
2104

2105
    This checks that the instance is in the cluster.
2106

2107
    """
2108
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2109
    assert self.instance is not None, \
2110
      "Cannot retrieve locked instance %s" % self.op.instance_name
2111

    
2112
  def Exec(self, feedback_fn):
2113
    """Deactivate the disks
2114

2115
    """
2116
    instance = self.instance
2117
    _SafeShutdownInstanceDisks(self, instance)
2118

    
2119

    
2120
def _SafeShutdownInstanceDisks(lu, instance):
2121
  """Shutdown block devices of an instance.
2122

2123
  This function checks if an instance is running, before calling
2124
  _ShutdownInstanceDisks.
2125

2126
  """
2127
  ins_l = lu.rpc.call_instance_list([instance.primary_node],
2128
                                      [instance.hypervisor])
2129
  ins_l = ins_l[instance.primary_node]
2130
  if not type(ins_l) is list:
2131
    raise errors.OpExecError("Can't contact node '%s'" %
2132
                             instance.primary_node)
2133

    
2134
  if instance.name in ins_l:
2135
    raise errors.OpExecError("Instance is running, can't shutdown"
2136
                             " block devices.")
2137

    
2138
  _ShutdownInstanceDisks(lu, instance)
2139

    
2140

    
2141
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
2142
  """Shutdown block devices of an instance.
2143

2144
  This does the shutdown on all nodes of the instance.
2145

2146
  If the ignore_primary is false, errors on the primary node are
2147
  ignored.
2148

2149
  """
2150
  result = True
2151
  for disk in instance.disks:
2152
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
2153
      lu.cfg.SetDiskID(top_disk, node)
2154
      if not lu.rpc.call_blockdev_shutdown(node, top_disk):
2155
        logging.error("Could not shutdown block device %s on node %s",
2156
                      disk.iv_name, node)
2157
        if not ignore_primary or node != instance.primary_node:
2158
          result = False
2159
  return result
2160

    
2161

    
2162
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor):
2163
  """Checks if a node has enough free memory.
2164

2165
  This function check if a given node has the needed amount of free
2166
  memory. In case the node has less memory or we cannot get the
2167
  information from the node, this function raise an OpPrereqError
2168
  exception.
2169

2170
  @type lu: C{LogicalUnit}
2171
  @param lu: a logical unit from which we get configuration data
2172
  @type node: C{str}
2173
  @param node: the node to check
2174
  @type reason: C{str}
2175
  @param reason: string to use in the error message
2176
  @type requested: C{int}
2177
  @param requested: the amount of memory in MiB to check for
2178
  @type hypervisor: C{str}
2179
  @param hypervisor: the hypervisor to ask for memory stats
2180
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
2181
      we cannot check the node
2182

2183
  """
2184
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor)
2185
  if not nodeinfo or not isinstance(nodeinfo, dict):
2186
    raise errors.OpPrereqError("Could not contact node %s for resource"
2187
                             " information" % (node,))
2188

    
2189
  free_mem = nodeinfo[node].get('memory_free')
2190
  if not isinstance(free_mem, int):
2191
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
2192
                             " was '%s'" % (node, free_mem))
2193
  if requested > free_mem:
2194
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
2195
                             " needed %s MiB, available %s MiB" %
2196
                             (node, reason, requested, free_mem))
2197

    
2198

    
2199
class LUStartupInstance(LogicalUnit):
2200
  """Starts an instance.
2201

2202
  """
2203
  HPATH = "instance-start"
2204
  HTYPE = constants.HTYPE_INSTANCE
2205
  _OP_REQP = ["instance_name", "force"]
2206
  REQ_BGL = False
2207

    
2208
  def ExpandNames(self):
2209
    self._ExpandAndLockInstance()
2210
    self.needed_locks[locking.LEVEL_NODE] = []
2211
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2212

    
2213
  def DeclareLocks(self, level):
2214
    if level == locking.LEVEL_NODE:
2215
      self._LockInstancesNodes()
2216

    
2217
  def BuildHooksEnv(self):
2218
    """Build hooks env.
2219

2220
    This runs on master, primary and secondary nodes of the instance.
2221

2222
    """
2223
    env = {
2224
      "FORCE": self.op.force,
2225
      }
2226
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
2227
    nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
2228
          list(self.instance.secondary_nodes))
2229
    return env, nl, nl
2230

    
2231
  def CheckPrereq(self):
2232
    """Check prerequisites.
2233

2234
    This checks that the instance is in the cluster.
2235

2236
    """
2237
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2238
    assert self.instance is not None, \
2239
      "Cannot retrieve locked instance %s" % self.op.instance_name
2240

    
2241
    bep = self.cfg.GetClusterInfo().FillBE(instance)
2242
    # check bridges existance
2243
    _CheckInstanceBridgesExist(self, instance)
2244

    
2245
    _CheckNodeFreeMemory(self, instance.primary_node,
2246
                         "starting instance %s" % instance.name,
2247
                         bep[constants.BE_MEMORY], instance.hypervisor)
2248

    
2249
  def Exec(self, feedback_fn):
2250
    """Start the instance.
2251

2252
    """
2253
    instance = self.instance
2254
    force = self.op.force
2255
    extra_args = getattr(self.op, "extra_args", "")
2256

    
2257
    self.cfg.MarkInstanceUp(instance.name)
2258

    
2259
    node_current = instance.primary_node
2260

    
2261
    _StartInstanceDisks(self, instance, force)
2262

    
2263
    if not self.rpc.call_instance_start(node_current, instance, extra_args):
2264
      _ShutdownInstanceDisks(self, instance)
2265
      raise errors.OpExecError("Could not start instance")
2266

    
2267

    
2268
class LURebootInstance(LogicalUnit):
2269
  """Reboot an instance.
2270

2271
  """
2272
  HPATH = "instance-reboot"
2273
  HTYPE = constants.HTYPE_INSTANCE
2274
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
2275
  REQ_BGL = False
2276

    
2277
  def ExpandNames(self):
2278
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
2279
                                   constants.INSTANCE_REBOOT_HARD,
2280
                                   constants.INSTANCE_REBOOT_FULL]:
2281
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
2282
                                  (constants.INSTANCE_REBOOT_SOFT,
2283
                                   constants.INSTANCE_REBOOT_HARD,
2284
                                   constants.INSTANCE_REBOOT_FULL))
2285
    self._ExpandAndLockInstance()
2286
    self.needed_locks[locking.LEVEL_NODE] = []
2287
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2288

    
2289
  def DeclareLocks(self, level):
2290
    if level == locking.LEVEL_NODE:
2291
      primary_only = not constants.INSTANCE_REBOOT_FULL
2292
      self._LockInstancesNodes(primary_only=primary_only)
2293

    
2294
  def BuildHooksEnv(self):
2295
    """Build hooks env.
2296

2297
    This runs on master, primary and secondary nodes of the instance.
2298

2299
    """
2300
    env = {
2301
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
2302
      }
2303
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
2304
    nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
2305
          list(self.instance.secondary_nodes))
2306
    return env, nl, nl
2307

    
2308
  def CheckPrereq(self):
2309
    """Check prerequisites.
2310

2311
    This checks that the instance is in the cluster.
2312

2313
    """
2314
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2315
    assert self.instance is not None, \
2316
      "Cannot retrieve locked instance %s" % self.op.instance_name
2317

    
2318
    # check bridges existance
2319
    _CheckInstanceBridgesExist(self, instance)
2320

    
2321
  def Exec(self, feedback_fn):
2322
    """Reboot the instance.
2323

2324
    """
2325
    instance = self.instance
2326
    ignore_secondaries = self.op.ignore_secondaries
2327
    reboot_type = self.op.reboot_type
2328
    extra_args = getattr(self.op, "extra_args", "")
2329

    
2330
    node_current = instance.primary_node
2331

    
2332
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
2333
                       constants.INSTANCE_REBOOT_HARD]:
2334
      if not self.rpc.call_instance_reboot(node_current, instance,
2335
                                           reboot_type, extra_args):
2336
        raise errors.OpExecError("Could not reboot instance")
2337
    else:
2338
      if not self.rpc.call_instance_shutdown(node_current, instance):
2339
        raise errors.OpExecError("could not shutdown instance for full reboot")
2340
      _ShutdownInstanceDisks(self, instance)
2341
      _StartInstanceDisks(self, instance, ignore_secondaries)
2342
      if not self.rpc.call_instance_start(node_current, instance, extra_args):
2343
        _ShutdownInstanceDisks(self, instance)
2344
        raise errors.OpExecError("Could not start instance for full reboot")
2345

    
2346
    self.cfg.MarkInstanceUp(instance.name)
2347

    
2348

    
2349
class LUShutdownInstance(LogicalUnit):
2350
  """Shutdown an instance.
2351

2352
  """
2353
  HPATH = "instance-stop"
2354
  HTYPE = constants.HTYPE_INSTANCE
2355
  _OP_REQP = ["instance_name"]
2356
  REQ_BGL = False
2357

    
2358
  def ExpandNames(self):
2359
    self._ExpandAndLockInstance()
2360
    self.needed_locks[locking.LEVEL_NODE] = []
2361
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2362

    
2363
  def DeclareLocks(self, level):
2364
    if level == locking.LEVEL_NODE:
2365
      self._LockInstancesNodes()
2366

    
2367
  def BuildHooksEnv(self):
2368
    """Build hooks env.
2369

2370
    This runs on master, primary and secondary nodes of the instance.
2371

2372
    """
2373
    env = _BuildInstanceHookEnvByObject(self, self.instance)
2374
    nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
2375
          list(self.instance.secondary_nodes))
2376
    return env, nl, nl
2377

    
2378
  def CheckPrereq(self):
2379
    """Check prerequisites.
2380

2381
    This checks that the instance is in the cluster.
2382

2383
    """
2384
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2385
    assert self.instance is not None, \
2386
      "Cannot retrieve locked instance %s" % self.op.instance_name
2387

    
2388
  def Exec(self, feedback_fn):
2389
    """Shutdown the instance.
2390

2391
    """
2392
    instance = self.instance
2393
    node_current = instance.primary_node
2394
    self.cfg.MarkInstanceDown(instance.name)
2395
    if not self.rpc.call_instance_shutdown(node_current, instance):
2396
      self.proc.LogWarning("Could not shutdown instance")
2397

    
2398
    _ShutdownInstanceDisks(self, instance)
2399

    
2400

    
2401
class LUReinstallInstance(LogicalUnit):
2402
  """Reinstall an instance.
2403

2404
  """
2405
  HPATH = "instance-reinstall"
2406
  HTYPE = constants.HTYPE_INSTANCE
2407
  _OP_REQP = ["instance_name"]
2408
  REQ_BGL = False
2409

    
2410
  def ExpandNames(self):
2411
    self._ExpandAndLockInstance()
2412
    self.needed_locks[locking.LEVEL_NODE] = []
2413
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2414

    
2415
  def DeclareLocks(self, level):
2416
    if level == locking.LEVEL_NODE:
2417
      self._LockInstancesNodes()
2418

    
2419
  def BuildHooksEnv(self):
2420
    """Build hooks env.
2421

2422
    This runs on master, primary and secondary nodes of the instance.
2423

2424
    """
2425
    env = _BuildInstanceHookEnvByObject(self, self.instance)
2426
    nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
2427
          list(self.instance.secondary_nodes))
2428
    return env, nl, nl
2429

    
2430
  def CheckPrereq(self):
2431
    """Check prerequisites.
2432

2433
    This checks that the instance is in the cluster and is not running.
2434

2435
    """
2436
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2437
    assert instance is not None, \
2438
      "Cannot retrieve locked instance %s" % self.op.instance_name
2439

    
2440
    if instance.disk_template == constants.DT_DISKLESS:
2441
      raise errors.OpPrereqError("Instance '%s' has no disks" %
2442
                                 self.op.instance_name)
2443
    if instance.status != "down":
2444
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
2445
                                 self.op.instance_name)
2446
    remote_info = self.rpc.call_instance_info(instance.primary_node,
2447
                                              instance.name,
2448
                                              instance.hypervisor)
2449
    if remote_info:
2450
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
2451
                                 (self.op.instance_name,
2452
                                  instance.primary_node))
2453

    
2454
    self.op.os_type = getattr(self.op, "os_type", None)
2455
    if self.op.os_type is not None:
2456
      # OS verification
2457
      pnode = self.cfg.GetNodeInfo(
2458
        self.cfg.ExpandNodeName(instance.primary_node))
2459
      if pnode is None:
2460
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
2461
                                   self.op.pnode)
2462
      os_obj = self.rpc.call_os_get(pnode.name, self.op.os_type)
2463
      if not os_obj:
2464
        raise errors.OpPrereqError("OS '%s' not in supported OS list for"
2465
                                   " primary node"  % self.op.os_type)
2466

    
2467
    self.instance = instance
2468

    
2469
  def Exec(self, feedback_fn):
2470
    """Reinstall the instance.
2471

2472
    """
2473
    inst = self.instance
2474

    
2475
    if self.op.os_type is not None:
2476
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
2477
      inst.os = self.op.os_type
2478
      self.cfg.Update(inst)
2479

    
2480
    _StartInstanceDisks(self, inst, None)
2481
    try:
2482
      feedback_fn("Running the instance OS create scripts...")
2483
      if not self.rpc.call_instance_os_add(inst.primary_node, inst,
2484
                                           "sda", "sdb"):
2485
        raise errors.OpExecError("Could not install OS for instance %s"
2486
                                 " on node %s" %
2487
                                 (inst.name, inst.primary_node))
2488
    finally:
2489
      _ShutdownInstanceDisks(self, inst)
2490

    
2491

    
2492
class LURenameInstance(LogicalUnit):
2493
  """Rename an instance.
2494

2495
  """
2496
  HPATH = "instance-rename"
2497
  HTYPE = constants.HTYPE_INSTANCE
2498
  _OP_REQP = ["instance_name", "new_name"]
2499

    
2500
  def BuildHooksEnv(self):
2501
    """Build hooks env.
2502

2503
    This runs on master, primary and secondary nodes of the instance.
2504

2505
    """
2506
    env = _BuildInstanceHookEnvByObject(self, self.instance)
2507
    env["INSTANCE_NEW_NAME"] = self.op.new_name
2508
    nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
2509
          list(self.instance.secondary_nodes))
2510
    return env, nl, nl
2511

    
2512
  def CheckPrereq(self):
2513
    """Check prerequisites.
2514

2515
    This checks that the instance is in the cluster and is not running.
2516

2517
    """
2518
    instance = self.cfg.GetInstanceInfo(
2519
      self.cfg.ExpandInstanceName(self.op.instance_name))
2520
    if instance is None:
2521
      raise errors.OpPrereqError("Instance '%s' not known" %
2522
                                 self.op.instance_name)
2523
    if instance.status != "down":
2524
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
2525
                                 self.op.instance_name)
2526
    remote_info = self.rpc.call_instance_info(instance.primary_node,
2527
                                              instance.name,
2528
                                              instance.hypervisor)
2529
    if remote_info:
2530
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
2531
                                 (self.op.instance_name,
2532
                                  instance.primary_node))
2533
    self.instance = instance
2534

    
2535
    # new name verification
2536
    name_info = utils.HostInfo(self.op.new_name)
2537

    
2538
    self.op.new_name = new_name = name_info.name
2539
    instance_list = self.cfg.GetInstanceList()
2540
    if new_name in instance_list:
2541
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
2542
                                 new_name)
2543

    
2544
    if not getattr(self.op, "ignore_ip", False):
2545
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
2546
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
2547
                                   (name_info.ip, new_name))
2548

    
2549

    
2550
  def Exec(self, feedback_fn):
2551
    """Reinstall the instance.
2552

2553
    """
2554
    inst = self.instance
2555
    old_name = inst.name
2556

    
2557
    if inst.disk_template == constants.DT_FILE:
2558
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
2559

    
2560
    self.cfg.RenameInstance(inst.name, self.op.new_name)
2561
    # Change the instance lock. This is definitely safe while we hold the BGL
2562
    self.context.glm.remove(locking.LEVEL_INSTANCE, inst.name)
2563
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
2564

    
2565
    # re-read the instance from the configuration after rename
2566
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
2567

    
2568
    if inst.disk_template == constants.DT_FILE:
2569
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
2570
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
2571
                                                     old_file_storage_dir,
2572
                                                     new_file_storage_dir)
2573

    
2574
      if not result:
2575
        raise errors.OpExecError("Could not connect to node '%s' to rename"
2576
                                 " directory '%s' to '%s' (but the instance"
2577
                                 " has been renamed in Ganeti)" % (
2578
                                 inst.primary_node, old_file_storage_dir,
2579
                                 new_file_storage_dir))
2580

    
2581
      if not result[0]:
2582
        raise errors.OpExecError("Could not rename directory '%s' to '%s'"
2583
                                 " (but the instance has been renamed in"
2584
                                 " Ganeti)" % (old_file_storage_dir,
2585
                                               new_file_storage_dir))
2586

    
2587
    _StartInstanceDisks(self, inst, None)
2588
    try:
2589
      if not self.rpc.call_instance_run_rename(inst.primary_node, inst,
2590
                                               old_name):
2591
        msg = ("Could not run OS rename script for instance %s on node %s"
2592
               " (but the instance has been renamed in Ganeti)" %
2593
               (inst.name, inst.primary_node))
2594
        self.proc.LogWarning(msg)
2595
    finally:
2596
      _ShutdownInstanceDisks(self, inst)
2597

    
2598

    
2599
class LURemoveInstance(LogicalUnit):
2600
  """Remove an instance.
2601

2602
  """
2603
  HPATH = "instance-remove"
2604
  HTYPE = constants.HTYPE_INSTANCE
2605
  _OP_REQP = ["instance_name", "ignore_failures"]
2606
  REQ_BGL = False
2607

    
2608
  def ExpandNames(self):
2609
    self._ExpandAndLockInstance()
2610
    self.needed_locks[locking.LEVEL_NODE] = []
2611
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2612

    
2613
  def DeclareLocks(self, level):
2614
    if level == locking.LEVEL_NODE:
2615
      self._LockInstancesNodes()
2616

    
2617
  def BuildHooksEnv(self):
2618
    """Build hooks env.
2619

2620
    This runs on master, primary and secondary nodes of the instance.
2621

2622
    """
2623
    env = _BuildInstanceHookEnvByObject(self, self.instance)
2624
    nl = [self.cfg.GetMasterNode()]
2625
    return env, nl, nl
2626

    
2627
  def CheckPrereq(self):
2628
    """Check prerequisites.
2629

2630
    This checks that the instance is in the cluster.
2631

2632
    """
2633
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2634
    assert self.instance is not None, \
2635
      "Cannot retrieve locked instance %s" % self.op.instance_name
2636

    
2637
  def Exec(self, feedback_fn):
2638
    """Remove the instance.
2639

2640
    """
2641
    instance = self.instance
2642
    logging.info("Shutting down instance %s on node %s",
2643
                 instance.name, instance.primary_node)
2644

    
2645
    if not self.rpc.call_instance_shutdown(instance.primary_node, instance):
2646
      if self.op.ignore_failures:
2647
        feedback_fn("Warning: can't shutdown instance")
2648
      else:
2649
        raise errors.OpExecError("Could not shutdown instance %s on node %s" %
2650
                                 (instance.name, instance.primary_node))
2651

    
2652
    logging.info("Removing block devices for instance %s", instance.name)
2653

    
2654
    if not _RemoveDisks(self, instance):
2655
      if self.op.ignore_failures:
2656
        feedback_fn("Warning: can't remove instance's disks")
2657
      else:
2658
        raise errors.OpExecError("Can't remove instance's disks")
2659

    
2660
    logging.info("Removing instance %s out of cluster config", instance.name)
2661

    
2662
    self.cfg.RemoveInstance(instance.name)
2663
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
2664

    
2665

    
2666
class LUQueryInstances(NoHooksLU):
2667
  """Logical unit for querying instances.
2668

2669
  """
2670
  _OP_REQP = ["output_fields", "names"]
2671
  REQ_BGL = False
2672

    
2673
  def ExpandNames(self):
2674
    self.dynamic_fields = frozenset(["oper_state", "oper_ram", "status"])
2675
    hvp = ["hv/%s" % name for name in constants.HVS_PARAMETERS]
2676
    bep = ["be/%s" % name for name in constants.BES_PARAMETERS]
2677
    self.static_fields = frozenset([
2678
      "name", "os", "pnode", "snodes",
2679
      "admin_state", "admin_ram",
2680
      "disk_template", "ip", "mac", "bridge",
2681
      "sda_size", "sdb_size", "vcpus", "tags",
2682
      "network_port", "beparams",
2683
      "serial_no", "hypervisor", "hvparams",
2684
      ] + hvp + bep)
2685

    
2686
    _CheckOutputFields(static=self.static_fields,
2687
                       dynamic=self.dynamic_fields,
2688
                       selected=self.op.output_fields)
2689

    
2690
    self.needed_locks = {}
2691
    self.share_locks[locking.LEVEL_INSTANCE] = 1
2692
    self.share_locks[locking.LEVEL_NODE] = 1
2693

    
2694
    if self.op.names:
2695
      self.wanted = _GetWantedInstances(self, self.op.names)
2696
    else:
2697
      self.wanted = locking.ALL_SET
2698

    
2699
    self.do_locking = not self.static_fields.issuperset(self.op.output_fields)
2700
    if self.do_locking:
2701
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
2702
      self.needed_locks[locking.LEVEL_NODE] = []
2703
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2704

    
2705
  def DeclareLocks(self, level):
2706
    if level == locking.LEVEL_NODE and self.do_locking:
2707
      self._LockInstancesNodes()
2708

    
2709
  def CheckPrereq(self):
2710
    """Check prerequisites.
2711

2712
    """
2713
    pass
2714

    
2715
  def Exec(self, feedback_fn):
2716
    """Computes the list of nodes and their attributes.
2717

2718
    """
2719
    all_info = self.cfg.GetAllInstancesInfo()
2720
    if self.do_locking:
2721
      instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2722
    elif self.wanted != locking.ALL_SET:
2723
      instance_names = self.wanted
2724
      missing = set(instance_names).difference(all_info.keys())
2725
      if missing:
2726
        raise errors.OpExecError(
2727
          "Some instances were removed before retrieving their data: %s"
2728
          % missing)
2729
    else:
2730
      instance_names = all_info.keys()
2731

    
2732
    instance_names = utils.NiceSort(instance_names)
2733
    instance_list = [all_info[iname] for iname in instance_names]
2734

    
2735
    # begin data gathering
2736

    
2737
    nodes = frozenset([inst.primary_node for inst in instance_list])
2738
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
2739

    
2740
    bad_nodes = []
2741
    if self.dynamic_fields.intersection(self.op.output_fields):
2742
      live_data = {}
2743
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
2744
      for name in nodes:
2745
        result = node_data[name]
2746
        if result:
2747
          live_data.update(result)
2748
        elif result == False:
2749
          bad_nodes.append(name)
2750
        # else no instance is alive
2751
    else:
2752
      live_data = dict([(name, {}) for name in instance_names])
2753

    
2754
    # end data gathering
2755

    
2756
    HVPREFIX = "hv/"
2757
    BEPREFIX = "be/"
2758
    output = []
2759
    for instance in instance_list:
2760
      iout = []
2761
      i_hv = self.cfg.GetClusterInfo().FillHV(instance)
2762
      i_be = self.cfg.GetClusterInfo().FillBE(instance)
2763
      for field in self.op.output_fields:
2764
        if field == "name":
2765
          val = instance.name
2766
        elif field == "os":
2767
          val = instance.os
2768
        elif field == "pnode":
2769
          val = instance.primary_node
2770
        elif field == "snodes":
2771
          val = list(instance.secondary_nodes)
2772
        elif field == "admin_state":
2773
          val = (instance.status != "down")
2774
        elif field == "oper_state":
2775
          if instance.primary_node in bad_nodes:
2776
            val = None
2777
          else:
2778
            val = bool(live_data.get(instance.name))
2779
        elif field == "status":
2780
          if instance.primary_node in bad_nodes:
2781
            val = "ERROR_nodedown"
2782
          else:
2783
            running = bool(live_data.get(instance.name))
2784
            if running:
2785
              if instance.status != "down":
2786
                val = "running"
2787
              else:
2788
                val = "ERROR_up"
2789
            else:
2790
              if instance.status != "down":
2791
                val = "ERROR_down"
2792
              else:
2793
                val = "ADMIN_down"
2794
        elif field == "oper_ram":
2795
          if instance.primary_node in bad_nodes:
2796
            val = None
2797
          elif instance.name in live_data:
2798
            val = live_data[instance.name].get("memory", "?")
2799
          else:
2800
            val = "-"
2801
        elif field == "disk_template":
2802
          val = instance.disk_template
2803
        elif field == "ip":
2804
          val = instance.nics[0].ip
2805
        elif field == "bridge":
2806
          val = instance.nics[0].bridge
2807
        elif field == "mac":
2808
          val = instance.nics[0].mac
2809
        elif field == "sda_size" or field == "sdb_size":
2810
          disk = instance.FindDisk(field[:3])
2811
          if disk is None:
2812
            val = None
2813
          else:
2814
            val = disk.size
2815
        elif field == "tags":
2816
          val = list(instance.GetTags())
2817
        elif field == "serial_no":
2818
          val = instance.serial_no
2819
        elif field == "network_port":
2820
          val = instance.network_port
2821
        elif field == "hypervisor":
2822
          val = instance.hypervisor
2823
        elif field == "hvparams":
2824
          val = i_hv
2825
        elif (field.startswith(HVPREFIX) and
2826
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
2827
          val = i_hv.get(field[len(HVPREFIX):], None)
2828
        elif field == "beparams":
2829
          val = i_be
2830
        elif (field.startswith(BEPREFIX) and
2831
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
2832
          val = i_be.get(field[len(BEPREFIX):], None)
2833
        else:
2834
          raise errors.ParameterError(field)
2835
        iout.append(val)
2836
      output.append(iout)
2837

    
2838
    return output
2839

    
2840

    
2841
class LUFailoverInstance(LogicalUnit):
2842
  """Failover an instance.
2843

2844
  """
2845
  HPATH = "instance-failover"
2846
  HTYPE = constants.HTYPE_INSTANCE
2847
  _OP_REQP = ["instance_name", "ignore_consistency"]
2848
  REQ_BGL = False
2849

    
2850
  def ExpandNames(self):
2851
    self._ExpandAndLockInstance()
2852
    self.needed_locks[locking.LEVEL_NODE] = []
2853
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2854

    
2855
  def DeclareLocks(self, level):
2856
    if level == locking.LEVEL_NODE:
2857
      self._LockInstancesNodes()
2858

    
2859
  def BuildHooksEnv(self):
2860
    """Build hooks env.
2861

2862
    This runs on master, primary and secondary nodes of the instance.
2863

2864
    """
2865
    env = {
2866
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
2867
      }
2868
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
2869
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
2870
    return env, nl, nl
2871

    
2872
  def CheckPrereq(self):
2873
    """Check prerequisites.
2874

2875
    This checks that the instance is in the cluster.
2876

2877
    """
2878
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2879
    assert self.instance is not None, \
2880
      "Cannot retrieve locked instance %s" % self.op.instance_name
2881

    
2882
    bep = self.cfg.GetClusterInfo().FillBE(instance)
2883
    if instance.disk_template not in constants.DTS_NET_MIRROR:
2884
      raise errors.OpPrereqError("Instance's disk layout is not"
2885
                                 " network mirrored, cannot failover.")
2886

    
2887
    secondary_nodes = instance.secondary_nodes
2888
    if not secondary_nodes:
2889
      raise errors.ProgrammerError("no secondary node but using "
2890
                                   "a mirrored disk template")
2891

    
2892
    target_node = secondary_nodes[0]
2893
    # check memory requirements on the secondary node
2894
    _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
2895
                         instance.name, bep[constants.BE_MEMORY],
2896
                         instance.hypervisor)
2897

    
2898
    # check bridge existance
2899
    brlist = [nic.bridge for nic in instance.nics]
2900
    if not self.rpc.call_bridges_exist(target_node, brlist):
2901
      raise errors.OpPrereqError("One or more target bridges %s does not"
2902
                                 " exist on destination node '%s'" %
2903
                                 (brlist, target_node))
2904

    
2905
  def Exec(self, feedback_fn):
2906
    """Failover an instance.
2907

2908
    The failover is done by shutting it down on its present node and
2909
    starting it on the secondary.
2910

2911
    """
2912
    instance = self.instance
2913

    
2914
    source_node = instance.primary_node
2915
    target_node = instance.secondary_nodes[0]
2916

    
2917
    feedback_fn("* checking disk consistency between source and target")
2918
    for dev in instance.disks:
2919
      # for drbd, these are drbd over lvm
2920
      if not _CheckDiskConsistency(self, dev, target_node, False):
2921
        if instance.status == "up" and not self.op.ignore_consistency:
2922
          raise errors.OpExecError("Disk %s is degraded on target node,"
2923
                                   " aborting failover." % dev.iv_name)
2924

    
2925
    feedback_fn("* shutting down instance on source node")
2926
    logging.info("Shutting down instance %s on node %s",
2927
                 instance.name, source_node)
2928

    
2929
    if not self.rpc.call_instance_shutdown(source_node, instance):
2930
      if self.op.ignore_consistency:
2931
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
2932
                             " Proceeding"
2933
                             " anyway. Please make sure node %s is down",
2934
                             instance.name, source_node, source_node)
2935
      else:
2936
        raise errors.OpExecError("Could not shutdown instance %s on node %s" %
2937
                                 (instance.name, source_node))
2938

    
2939
    feedback_fn("* deactivating the instance's disks on source node")
2940
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
2941
      raise errors.OpExecError("Can't shut down the instance's disks.")
2942

    
2943
    instance.primary_node = target_node
2944
    # distribute new instance config to the other nodes
2945
    self.cfg.Update(instance)
2946

    
2947
    # Only start the instance if it's marked as up
2948
    if instance.status == "up":
2949
      feedback_fn("* activating the instance's disks on target node")
2950
      logging.info("Starting instance %s on node %s",
2951
                   instance.name, target_node)
2952

    
2953
      disks_ok, dummy = _AssembleInstanceDisks(self, instance,
2954
                                               ignore_secondaries=True)
2955
      if not disks_ok:
2956
        _ShutdownInstanceDisks(self, instance)
2957
        raise errors.OpExecError("Can't activate the instance's disks")
2958

    
2959
      feedback_fn("* starting the instance on the target node")
2960
      if not self.rpc.call_instance_start(target_node, instance, None):
2961
        _ShutdownInstanceDisks(self, instance)
2962
        raise errors.OpExecError("Could not start instance %s on node %s." %
2963
                                 (instance.name, target_node))
2964

    
2965

    
2966
def _CreateBlockDevOnPrimary(lu, node, instance, device, info):
2967
  """Create a tree of block devices on the primary node.
2968

2969
  This always creates all devices.
2970

2971
  """
2972
  if device.children:
2973
    for child in device.children:
2974
      if not _CreateBlockDevOnPrimary(lu, node, instance, child, info):
2975
        return False
2976

    
2977
  lu.cfg.SetDiskID(device, node)
2978
  new_id = lu.rpc.call_blockdev_create(node, device, device.size,
2979
                                       instance.name, True, info)
2980
  if not new_id:
2981
    return False
2982
  if device.physical_id is None:
2983
    device.physical_id = new_id
2984
  return True
2985

    
2986

    
2987
def _CreateBlockDevOnSecondary(lu, node, instance, device, force, info):
2988
  """Create a tree of block devices on a secondary node.
2989

2990
  If this device type has to be created on secondaries, create it and
2991
  all its children.
2992

2993
  If not, just recurse to children keeping the same 'force' value.
2994

2995
  """
2996
  if device.CreateOnSecondary():
2997
    force = True
2998
  if device.children:
2999
    for child in device.children:
3000
      if not _CreateBlockDevOnSecondary(lu, node, instance,
3001
                                        child, force, info):
3002
        return False
3003

    
3004
  if not force:
3005
    return True
3006
  lu.cfg.SetDiskID(device, node)
3007
  new_id = lu.rpc.call_blockdev_create(node, device, device.size,
3008
                                       instance.name, False, info)
3009
  if not new_id:
3010
    return False
3011
  if device.physical_id is None:
3012
    device.physical_id = new_id
3013
  return True
3014

    
3015

    
3016
def _GenerateUniqueNames(lu, exts):
3017
  """Generate a suitable LV name.
3018

3019
  This will generate a logical volume name for the given instance.
3020

3021
  """
3022
  results = []
3023
  for val in exts:
3024
    new_id = lu.cfg.GenerateUniqueID()
3025
    results.append("%s%s" % (new_id, val))
3026
  return results
3027

    
3028

    
3029
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
3030
                         p_minor, s_minor):
3031
  """Generate a drbd8 device complete with its children.
3032

3033
  """
3034
  port = lu.cfg.AllocatePort()
3035
  vgname = lu.cfg.GetVGName()
3036
  shared_secret = lu.cfg.GenerateDRBDSecret()
3037
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
3038
                          logical_id=(vgname, names[0]))
3039
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
3040
                          logical_id=(vgname, names[1]))
3041
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
3042
                          logical_id=(primary, secondary, port,
3043
                                      p_minor, s_minor,
3044
                                      shared_secret),
3045
                          children=[dev_data, dev_meta],
3046
                          iv_name=iv_name)
3047
  return drbd_dev
3048

    
3049

    
3050
def _GenerateDiskTemplate(lu, template_name,
3051
                          instance_name, primary_node,
3052
                          secondary_nodes, disk_sz, swap_sz,
3053
                          file_storage_dir, file_driver):
3054
  """Generate the entire disk layout for a given template type.
3055

3056
  """
3057
  #TODO: compute space requirements
3058

    
3059
  vgname = lu.cfg.GetVGName()
3060
  if template_name == constants.DT_DISKLESS:
3061
    disks = []
3062
  elif template_name == constants.DT_PLAIN:
3063
    if len(secondary_nodes) != 0:
3064
      raise errors.ProgrammerError("Wrong template configuration")
3065

    
3066
    names = _GenerateUniqueNames(lu, [".sda", ".sdb"])
3067
    sda_dev = objects.Disk(dev_type=constants.LD_LV, size=disk_sz,
3068
                           logical_id=(vgname, names[0]),
3069
                           iv_name = "sda")
3070
    sdb_dev = objects.Disk(dev_type=constants.LD_LV, size=swap_sz,
3071
                           logical_id=(vgname, names[1]),
3072
                           iv_name = "sdb")
3073
    disks = [sda_dev, sdb_dev]
3074
  elif template_name == constants.DT_DRBD8:
3075
    if len(secondary_nodes) != 1:
3076
      raise errors.ProgrammerError("Wrong template configuration")
3077
    remote_node = secondary_nodes[0]
3078
    (minor_pa, minor_pb,
3079
     minor_sa, minor_sb) = lu.cfg.AllocateDRBDMinor(
3080
      [primary_node, primary_node, remote_node, remote_node], instance_name)
3081

    
3082
    names = _GenerateUniqueNames(lu, [".sda_data", ".sda_meta",
3083
                                      ".sdb_data", ".sdb_meta"])
3084
    drbd_sda_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
3085
                                        disk_sz, names[0:2], "sda",
3086
                                        minor_pa, minor_sa)
3087
    drbd_sdb_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
3088
                                        swap_sz, names[2:4], "sdb",
3089
                                        minor_pb, minor_sb)
3090
    disks = [drbd_sda_dev, drbd_sdb_dev]
3091
  elif template_name == constants.DT_FILE:
3092
    if len(secondary_nodes) != 0:
3093
      raise errors.ProgrammerError("Wrong template configuration")
3094

    
3095
    file_sda_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk_sz,
3096
                                iv_name="sda", logical_id=(file_driver,
3097
                                "%s/sda" % file_storage_dir))
3098
    file_sdb_dev = objects.Disk(dev_type=constants.LD_FILE, size=swap_sz,
3099
                                iv_name="sdb", logical_id=(file_driver,
3100
                                "%s/sdb" % file_storage_dir))
3101
    disks = [file_sda_dev, file_sdb_dev]
3102
  else:
3103
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
3104
  return disks
3105

    
3106

    
3107
def _GetInstanceInfoText(instance):
3108
  """Compute that text that should be added to the disk's metadata.
3109

3110
  """
3111
  return "originstname+%s" % instance.name
3112

    
3113

    
3114
def _CreateDisks(lu, instance):
3115
  """Create all disks for an instance.
3116

3117
  This abstracts away some work from AddInstance.
3118

3119
  Args:
3120
    instance: the instance object
3121

3122
  Returns:
3123
    True or False showing the success of the creation process
3124

3125
  """
3126
  info = _GetInstanceInfoText(instance)
3127

    
3128
  if instance.disk_template == constants.DT_FILE:
3129
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
3130
    result = lu.rpc.call_file_storage_dir_create(instance.primary_node,
3131
                                                 file_storage_dir)
3132

    
3133
    if not result:
3134
      logging.error("Could not connect to node '%s'", instance.primary_node)
3135
      return False
3136

    
3137
    if not result[0]:
3138
      logging.error("Failed to create directory '%s'", file_storage_dir)
3139
      return False
3140

    
3141
  for device in instance.disks:
3142
    logging.info("Creating volume %s for instance %s",
3143
                 device.iv_name, instance.name)
3144
    #HARDCODE
3145
    for secondary_node in instance.secondary_nodes:
3146
      if not _CreateBlockDevOnSecondary(lu, secondary_node, instance,
3147
                                        device, False, info):
3148
        logging.error("Failed to create volume %s (%s) on secondary node %s!",
3149
                      device.iv_name, device, secondary_node)
3150
        return False
3151
    #HARDCODE
3152
    if not _CreateBlockDevOnPrimary(lu, instance.primary_node,
3153
                                    instance, device, info):
3154
      logging.error("Failed to create volume %s on primary!", device.iv_name)
3155
      return False
3156

    
3157
  return True
3158

    
3159

    
3160
def _RemoveDisks(lu, instance):
3161
  """Remove all disks for an instance.
3162

3163
  This abstracts away some work from `AddInstance()` and
3164
  `RemoveInstance()`. Note that in case some of the devices couldn't
3165
  be removed, the removal will continue with the other ones (compare
3166
  with `_CreateDisks()`).
3167

3168
  Args:
3169
    instance: the instance object
3170

3171
  Returns:
3172
    True or False showing the success of the removal proces
3173

3174
  """
3175
  logging.info("Removing block devices for instance %s", instance.name)
3176

    
3177
  result = True
3178
  for device in instance.disks:
3179
    for node, disk in device.ComputeNodeTree(instance.primary_node):
3180
      lu.cfg.SetDiskID(disk, node)
3181
      if not lu.rpc.call_blockdev_remove(node, disk):
3182
        lu.proc.LogWarning("Could not remove block device %s on node %s,"
3183
                           " continuing anyway", device.iv_name, node)
3184
        result = False
3185

    
3186
  if instance.disk_template == constants.DT_FILE:
3187
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
3188
    if not lu.rpc.call_file_storage_dir_remove(instance.primary_node,
3189
                                               file_storage_dir):
3190
      logging.error("Could not remove directory '%s'", file_storage_dir)
3191
      result = False
3192

    
3193
  return result
3194

    
3195

    
3196
def _ComputeDiskSize(disk_template, disk_size, swap_size):
3197
  """Compute disk size requirements in the volume group
3198

3199
  This is currently hard-coded for the two-drive layout.
3200

3201
  """
3202
  # Required free disk space as a function of disk and swap space
3203
  req_size_dict = {
3204
    constants.DT_DISKLESS: None,
3205
    constants.DT_PLAIN: disk_size + swap_size,
3206
    # 256 MB are added for drbd metadata, 128MB for each drbd device
3207
    constants.DT_DRBD8: disk_size + swap_size + 256,
3208
    constants.DT_FILE: None,
3209
  }
3210

    
3211
  if disk_template not in req_size_dict:
3212
    raise errors.ProgrammerError("Disk template '%s' size requirement"
3213
                                 " is unknown" %  disk_template)
3214

    
3215
  return req_size_dict[disk_template]
3216

    
3217

    
3218
def _CheckHVParams(lu, nodenames, hvname, hvparams):
3219
  """Hypervisor parameter validation.
3220

3221
  This function abstract the hypervisor parameter validation to be
3222
  used in both instance create and instance modify.
3223

3224
  @type lu: L{LogicalUnit}
3225
  @param lu: the logical unit for which we check
3226
  @type nodenames: list
3227
  @param nodenames: the list of nodes on which we should check
3228
  @type hvname: string
3229
  @param hvname: the name of the hypervisor we should use
3230
  @type hvparams: dict
3231
  @param hvparams: the parameters which we need to check
3232
  @raise errors.OpPrereqError: if the parameters are not valid
3233

3234
  """
3235
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
3236
                                                  hvname,
3237
                                                  hvparams)
3238
  for node in nodenames:
3239
    info = hvinfo.get(node, None)
3240
    if not info or not isinstance(info, (tuple, list)):
3241
      raise errors.OpPrereqError("Cannot get current information"
3242
                                 " from node '%s' (%s)" % (node, info))
3243
    if not info[0]:
3244
      raise errors.OpPrereqError("Hypervisor parameter validation failed:"
3245
                                 " %s" % info[1])
3246

    
3247

    
3248
class LUCreateInstance(LogicalUnit):
3249
  """Create an instance.
3250

3251
  """
3252
  HPATH = "instance-add"
3253
  HTYPE = constants.HTYPE_INSTANCE
3254
  _OP_REQP = ["instance_name", "disk_size",
3255
              "disk_template", "swap_size", "mode", "start",
3256
              "wait_for_sync", "ip_check", "mac",
3257
              "hvparams", "beparams"]
3258
  REQ_BGL = False
3259

    
3260
  def _ExpandNode(self, node):
3261
    """Expands and checks one node name.
3262

3263
    """
3264
    node_full = self.cfg.ExpandNodeName(node)
3265
    if node_full is None:
3266
      raise errors.OpPrereqError("Unknown node %s" % node)
3267
    return node_full
3268

    
3269
  def ExpandNames(self):
3270
    """ExpandNames for CreateInstance.
3271

3272
    Figure out the right locks for instance creation.
3273

3274
    """
3275
    self.needed_locks = {}
3276

    
3277
    # set optional parameters to none if they don't exist
3278
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
3279
      if not hasattr(self.op, attr):
3280
        setattr(self.op, attr, None)
3281

    
3282
    # cheap checks, mostly valid constants given
3283

    
3284
    # verify creation mode
3285
    if self.op.mode not in (constants.INSTANCE_CREATE,
3286
                            constants.INSTANCE_IMPORT):
3287
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
3288
                                 self.op.mode)
3289

    
3290
    # disk template and mirror node verification
3291
    if self.op.disk_template not in constants.DISK_TEMPLATES:
3292
      raise errors.OpPrereqError("Invalid disk template name")
3293

    
3294
    if self.op.hypervisor is None:
3295
      self.op.hypervisor = self.cfg.GetHypervisorType()
3296

    
3297
    cluster = self.cfg.GetClusterInfo()
3298
    enabled_hvs = cluster.enabled_hypervisors
3299
    if self.op.hypervisor not in enabled_hvs:
3300
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
3301
                                 " cluster (%s)" % (self.op.hypervisor,
3302
                                  ",".join(enabled_hvs)))
3303

    
3304
    # check hypervisor parameter syntax (locally)
3305

    
3306
    filled_hvp = cluster.FillDict(cluster.hvparams[self.op.hypervisor],
3307
                                  self.op.hvparams)
3308
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
3309
    hv_type.CheckParameterSyntax(filled_hvp)
3310

    
3311
    # fill and remember the beparams dict
3312
    self.be_full = cluster.FillDict(cluster.beparams[constants.BEGR_DEFAULT],
3313
                                    self.op.beparams)
3314

    
3315
    #### instance parameters check
3316

    
3317
    # instance name verification
3318
    hostname1 = utils.HostInfo(self.op.instance_name)
3319
    self.op.instance_name = instance_name = hostname1.name
3320

    
3321
    # this is just a preventive check, but someone might still add this
3322
    # instance in the meantime, and creation will fail at lock-add time
3323
    if instance_name in self.cfg.GetInstanceList():
3324
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3325
                                 instance_name)
3326

    
3327
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
3328

    
3329
    # ip validity checks
3330
    ip = getattr(self.op, "ip", None)
3331
    if ip is None or ip.lower() == "none":
3332
      inst_ip = None
3333
    elif ip.lower() == constants.VALUE_AUTO:
3334
      inst_ip = hostname1.ip
3335
    else:
3336
      if not utils.IsValidIP(ip):
3337
        raise errors.OpPrereqError("given IP address '%s' doesn't look"
3338
                                   " like a valid IP" % ip)
3339
      inst_ip = ip
3340
    self.inst_ip = self.op.ip = inst_ip
3341
    # used in CheckPrereq for ip ping check
3342
    self.check_ip = hostname1.ip
3343

    
3344
    # MAC address verification
3345
    if self.op.mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
3346
      if not utils.IsValidMac(self.op.mac.lower()):
3347
        raise errors.OpPrereqError("invalid MAC address specified: %s" %
3348
                                   self.op.mac)
3349

    
3350
    # file storage checks
3351
    if (self.op.file_driver and
3352
        not self.op.file_driver in constants.FILE_DRIVER):
3353
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
3354
                                 self.op.file_driver)
3355

    
3356
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
3357
      raise errors.OpPrereqError("File storage directory path not absolute")
3358

    
3359
    ### Node/iallocator related checks
3360
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
3361
      raise errors.OpPrereqError("One and only one of iallocator and primary"
3362
                                 " node must be given")
3363

    
3364
    if self.op.iallocator:
3365
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3366
    else:
3367
      self.op.pnode = self._ExpandNode(self.op.pnode)
3368
      nodelist = [self.op.pnode]
3369
      if self.op.snode is not None:
3370
        self.op.snode = self._ExpandNode(self.op.snode)
3371
        nodelist.append(self.op.snode)
3372
      self.needed_locks[locking.LEVEL_NODE] = nodelist
3373

    
3374
    # in case of import lock the source node too
3375
    if self.op.mode == constants.INSTANCE_IMPORT:
3376
      src_node = getattr(self.op, "src_node", None)
3377
      src_path = getattr(self.op, "src_path", None)
3378

    
3379
      if src_node is None or src_path is None:
3380
        raise errors.OpPrereqError("Importing an instance requires source"
3381
                                   " node and path options")
3382

    
3383
      if not os.path.isabs(src_path):
3384
        raise errors.OpPrereqError("The source path must be absolute")
3385

    
3386
      self.op.src_node = src_node = self._ExpandNode(src_node)
3387
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
3388
        self.needed_locks[locking.LEVEL_NODE].append(src_node)
3389

    
3390
    else: # INSTANCE_CREATE
3391
      if getattr(self.op, "os_type", None) is None:
3392
        raise errors.OpPrereqError("No guest OS specified")
3393

    
3394
  def _RunAllocator(self):
3395
    """Run the allocator based on input opcode.
3396

3397
    """
3398
    disks = [{"size": self.op.disk_size, "mode": "w"},
3399
             {"size": self.op.swap_size, "mode": "w"}]
3400
    nics = [{"mac": self.op.mac, "ip": getattr(self.op, "ip", None),
3401
             "bridge": self.op.bridge}]
3402
    ial = IAllocator(self,
3403
                     mode=constants.IALLOCATOR_MODE_ALLOC,
3404
                     name=self.op.instance_name,
3405
                     disk_template=self.op.disk_template,
3406
                     tags=[],
3407
                     os=self.op.os_type,
3408
                     vcpus=self.be_full[constants.BE_VCPUS],
3409
                     mem_size=self.be_full[constants.BE_MEMORY],
3410
                     disks=disks,
3411
                     nics=nics,
3412
                     )
3413

    
3414
    ial.Run(self.op.iallocator)
3415

    
3416
    if not ial.success:
3417
      raise errors.OpPrereqError("Can't compute nodes using"
3418
                                 " iallocator '%s': %s" % (self.op.iallocator,
3419
                                                           ial.info))
3420
    if len(ial.nodes) != ial.required_nodes:
3421
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
3422
                                 " of nodes (%s), required %s" %
3423
                                 (self.op.iallocator, len(ial.nodes),
3424
                                  ial.required_nodes))
3425
    self.op.pnode = ial.nodes[0]
3426
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
3427
                 self.op.instance_name, self.op.iallocator,
3428
                 ", ".join(ial.nodes))
3429
    if ial.required_nodes == 2:
3430
      self.op.snode = ial.nodes[1]
3431

    
3432
  def BuildHooksEnv(self):
3433
    """Build hooks env.
3434

3435
    This runs on master, primary and secondary nodes of the instance.
3436

3437
    """
3438
    env = {
3439
      "INSTANCE_DISK_TEMPLATE": self.op.disk_template,
3440
      "INSTANCE_DISK_SIZE": self.op.disk_size,
3441
      "INSTANCE_SWAP_SIZE": self.op.swap_size,
3442
      "INSTANCE_ADD_MODE": self.op.mode,
3443
      }
3444
    if self.op.mode == constants.INSTANCE_IMPORT:
3445
      env["INSTANCE_SRC_NODE"] = self.op.src_node
3446
      env["INSTANCE_SRC_PATH"] = self.op.src_path
3447
      env["INSTANCE_SRC_IMAGES"] = self.src_images
3448

    
3449
    env.update(_BuildInstanceHookEnv(name=self.op.instance_name,
3450
      primary_node=self.op.pnode,
3451
      secondary_nodes=self.secondaries,
3452
      status=self.instance_status,
3453
      os_type=self.op.os_type,
3454
      memory=self.be_full[constants.BE_MEMORY],
3455
      vcpus=self.be_full[constants.BE_VCPUS],
3456
      nics=[(self.inst_ip, self.op.bridge, self.op.mac)],
3457
    ))
3458

    
3459
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
3460
          self.secondaries)
3461
    return env, nl, nl
3462

    
3463

    
3464
  def CheckPrereq(self):
3465
    """Check prerequisites.
3466

3467
    """
3468
    if (not self.cfg.GetVGName() and
3469
        self.op.disk_template not in constants.DTS_NOT_LVM):
3470
      raise errors.OpPrereqError("Cluster does not support lvm-based"
3471
                                 " instances")
3472

    
3473

    
3474
    if self.op.mode == constants.INSTANCE_IMPORT:
3475
      src_node = self.op.src_node
3476
      src_path = self.op.src_path
3477

    
3478
      export_info = self.rpc.call_export_info(src_node, src_path)
3479

    
3480
      if not export_info:
3481
        raise errors.OpPrereqError("No export found in dir %s" % src_path)
3482

    
3483
      if not export_info.has_section(constants.INISECT_EXP):
3484
        raise errors.ProgrammerError("Corrupted export config")
3485

    
3486
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
3487
      if (int(ei_version) != constants.EXPORT_VERSION):
3488
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
3489
                                   (ei_version, constants.EXPORT_VERSION))
3490

    
3491
      # Check that the new instance doesn't have less disks than the export
3492
      # TODO: substitute "2" with the actual number of disks requested
3493
      instance_disks = 2
3494
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
3495
      if instance_disks < export_disks:
3496
        raise errors.OpPrereqError("Not enough disks to import."
3497
                                   " (instance: %d, export: %d)" %
3498
                                   (2, export_disks))
3499

    
3500
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
3501
      disk_images = []
3502
      for idx in range(export_disks):
3503
        option = 'disk%d_dump' % idx
3504
        if export_info.has_option(constants.INISECT_INS, option):
3505
          # FIXME: are the old os-es, disk sizes, etc. useful?
3506
          export_name = export_info.get(constants.INISECT_INS, option)
3507
          image = os.path.join(src_path, export_name)
3508
          disk_images.append(image)
3509
        else:
3510
          disk_images.append(False)
3511

    
3512
      self.src_images = disk_images
3513

    
3514
      if self.op.mac == constants.VALUE_AUTO:
3515
        old_name = export_info.get(constants.INISECT_INS, 'name')
3516
        if self.op.instance_name == old_name:
3517
          # FIXME: adjust every nic, when we'll be able to create instances
3518
          # with more than one
3519
          if int(export_info.get(constants.INISECT_INS, 'nic_count')) >= 1:
3520
            self.op.mac = export_info.get(constants.INISECT_INS, 'nic_0_mac')
3521

    
3522
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
3523

    
3524
    if self.op.start and not self.op.ip_check:
3525
      raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
3526
                                 " adding an instance in start mode")
3527

    
3528
    if self.op.ip_check:
3529
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
3530
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
3531
                                   (self.check_ip, self.op.instance_name))
3532

    
3533
    # bridge verification
3534
    bridge = getattr(self.op, "bridge", None)
3535
    if bridge is None:
3536
      self.op.bridge = self.cfg.GetDefBridge()
3537
    else:
3538
      self.op.bridge = bridge
3539

    
3540
    #### allocator run
3541

    
3542
    if self.op.iallocator is not None:
3543
      self._RunAllocator()
3544

    
3545
    #### node related checks
3546

    
3547
    # check primary node
3548
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
3549
    assert self.pnode is not None, \
3550
      "Cannot retrieve locked node %s" % self.op.pnode
3551
    self.secondaries = []
3552

    
3553
    # mirror node verification
3554
    if self.op.disk_template in constants.DTS_NET_MIRROR:
3555
      if self.op.snode is None:
3556
        raise errors.OpPrereqError("The networked disk templates need"
3557
                                   " a mirror node")
3558
      if self.op.snode == pnode.name:
3559
        raise errors.OpPrereqError("The secondary node cannot be"
3560
                                   " the primary node.")
3561
      self.secondaries.append(self.op.snode)
3562

    
3563
    nodenames = [pnode.name] + self.secondaries
3564

    
3565
    req_size = _ComputeDiskSize(self.op.disk_template,
3566
                                self.op.disk_size, self.op.swap_size)
3567

    
3568
    # Check lv size requirements
3569
    if req_size is not None:
3570
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3571
                                         self.op.hypervisor)
3572
      for node in nodenames:
3573
        info = nodeinfo.get(node, None)
3574
        if not info:
3575
          raise errors.OpPrereqError("Cannot get current information"
3576
                                     " from node '%s'" % node)
3577
        vg_free = info.get('vg_free', None)
3578
        if not isinstance(vg_free, int):
3579
          raise errors.OpPrereqError("Can't compute free disk space on"
3580
                                     " node %s" % node)
3581
        if req_size > info['vg_free']:
3582
          raise errors.OpPrereqError("Not enough disk space on target node %s."
3583
                                     " %d MB available, %d MB required" %
3584
                                     (node, info['vg_free'], req_size))
3585

    
3586
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
3587

    
3588
    # os verification
3589
    os_obj = self.rpc.call_os_get(pnode.name, self.op.os_type)
3590
    if not os_obj:
3591
      raise errors.OpPrereqError("OS '%s' not in supported os list for"
3592
                                 " primary node"  % self.op.os_type)
3593

    
3594
    # bridge check on primary node
3595
    if not self.rpc.call_bridges_exist(self.pnode.name, [self.op.bridge]):
3596
      raise errors.OpPrereqError("target bridge '%s' does not exist on"
3597
                                 " destination node '%s'" %
3598
                                 (self.op.bridge, pnode.name))
3599

    
3600
    # memory check on primary node
3601
    if self.op.start:
3602
      _CheckNodeFreeMemory(self, self.pnode.name,
3603
                           "creating instance %s" % self.op.instance_name,
3604
                           self.be_full[constants.BE_MEMORY],
3605
                           self.op.hypervisor)
3606

    
3607
    if self.op.start:
3608
      self.instance_status = 'up'
3609
    else:
3610
      self.instance_status = 'down'
3611

    
3612
  def Exec(self, feedback_fn):
3613
    """Create and add the instance to the cluster.
3614

3615
    """
3616
    instance = self.op.instance_name
3617
    pnode_name = self.pnode.name
3618

    
3619
    if self.op.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
3620
      mac_address = self.cfg.GenerateMAC()
3621
    else:
3622
      mac_address = self.op.mac
3623

    
3624
    nic = objects.NIC(bridge=self.op.bridge, mac=mac_address)
3625
    if self.inst_ip is not None:
3626
      nic.ip = self.inst_ip
3627

    
3628
    ht_kind = self.op.hypervisor
3629
    if ht_kind in constants.HTS_REQ_PORT:
3630
      network_port = self.cfg.AllocatePort()
3631
    else:
3632
      network_port = None
3633

    
3634
    ##if self.op.vnc_bind_address is None:
3635
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
3636

    
3637
    # this is needed because os.path.join does not accept None arguments
3638
    if self.op.file_storage_dir is None:
3639
      string_file_storage_dir = ""
3640
    else:
3641
      string_file_storage_dir = self.op.file_storage_dir
3642

    
3643
    # build the full file storage dir path
3644
    file_storage_dir = os.path.normpath(os.path.join(
3645
                                        self.cfg.GetFileStorageDir(),
3646
                                        string_file_storage_dir, instance))
3647

    
3648

    
3649
    disks = _GenerateDiskTemplate(self,
3650
                                  self.op.disk_template,
3651
                                  instance, pnode_name,
3652
                                  self.secondaries, self.op.disk_size,
3653
                                  self.op.swap_size,
3654
                                  file_storage_dir,
3655
                                  self.op.file_driver)
3656

    
3657
    iobj = objects.Instance(name=instance, os=self.op.os_type,
3658
                            primary_node=pnode_name,
3659
                            nics=[nic], disks=disks,
3660
                            disk_template=self.op.disk_template,
3661
                            status=self.instance_status,
3662
                            network_port=network_port,
3663
                            beparams=self.op.beparams,
3664
                            hvparams=self.op.hvparams,
3665
                            hypervisor=self.op.hypervisor,
3666
                            )
3667

    
3668
    feedback_fn("* creating instance disks...")
3669
    if not _CreateDisks(self, iobj):
3670
      _RemoveDisks(self, iobj)
3671
      self.cfg.ReleaseDRBDMinors(instance)
3672
      raise errors.OpExecError("Device creation failed, reverting...")
3673

    
3674
    feedback_fn("adding instance %s to cluster config" % instance)
3675

    
3676
    self.cfg.AddInstance(iobj)
3677
    # Declare that we don't want to remove the instance lock anymore, as we've
3678
    # added the instance to the config
3679
    del self.remove_locks[locking.LEVEL_INSTANCE]
3680
    # Remove the temp. assignements for the instance's drbds
3681
    self.cfg.ReleaseDRBDMinors(instance)
3682

    
3683
    if self.op.wait_for_sync:
3684
      disk_abort = not _WaitForSync(self, iobj)
3685
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
3686
      # make sure the disks are not degraded (still sync-ing is ok)
3687
      time.sleep(15)
3688
      feedback_fn("* checking mirrors status")
3689
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
3690
    else:
3691
      disk_abort = False
3692

    
3693
    if disk_abort:
3694
      _RemoveDisks(self, iobj)
3695
      self.cfg.RemoveInstance(iobj.name)
3696
      # Make sure the instance lock gets removed
3697
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
3698
      raise errors.OpExecError("There are some degraded disks for"
3699
                               " this instance")
3700

    
3701
    feedback_fn("creating os for instance %s on node %s" %
3702
                (instance, pnode_name))
3703

    
3704
    if iobj.disk_template != constants.DT_DISKLESS:
3705
      if self.op.mode == constants.INSTANCE_CREATE:
3706
        feedback_fn("* running the instance OS create scripts...")
3707
        if not self.rpc.call_instance_os_add(pnode_name, iobj):
3708
          raise errors.OpExecError("could not add os for instance %s"
3709
                                   " on node %s" %
3710
                                   (instance, pnode_name))
3711

    
3712
      elif self.op.mode == constants.INSTANCE_IMPORT:
3713
        feedback_fn("* running the instance OS import scripts...")
3714
        src_node = self.op.src_node
3715
        src_images = self.src_images
3716
        cluster_name = self.cfg.GetClusterName()
3717
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
3718
                                                         src_node, src_images,
3719
                                                         cluster_name)
3720
        for idx, result in enumerate(import_result):
3721
          if not result:
3722
            self.LogWarning("Could not image %s for on instance %s, disk %d,"
3723
                            " on node %s" % (src_images[idx], instance, idx,
3724
                                             pnode_name))
3725
      else:
3726
        # also checked in the prereq part
3727
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
3728
                                     % self.op.mode)
3729

    
3730
    if self.op.start:
3731
      logging.info("Starting instance %s on node %s", instance, pnode_name)
3732
      feedback_fn("* starting instance...")
3733
      if not self.rpc.call_instance_start(pnode_name, iobj, None):
3734
        raise errors.OpExecError("Could not start instance")
3735

    
3736

    
3737
class LUConnectConsole(NoHooksLU):
3738
  """Connect to an instance's console.
3739

3740
  This is somewhat special in that it returns the command line that
3741
  you need to run on the master node in order to connect to the
3742
  console.
3743

3744
  """
3745
  _OP_REQP = ["instance_name"]
3746
  REQ_BGL = False
3747

    
3748
  def ExpandNames(self):
3749
    self._ExpandAndLockInstance()
3750

    
3751
  def CheckPrereq(self):
3752
    """Check prerequisites.
3753

3754
    This checks that the instance is in the cluster.
3755

3756
    """
3757
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3758
    assert self.instance is not None, \
3759
      "Cannot retrieve locked instance %s" % self.op.instance_name
3760

    
3761
  def Exec(self, feedback_fn):
3762
    """Connect to the console of an instance
3763

3764
    """
3765
    instance = self.instance
3766
    node = instance.primary_node
3767

    
3768
    node_insts = self.rpc.call_instance_list([node],
3769
                                             [instance.hypervisor])[node]
3770
    if node_insts is False:
3771
      raise errors.OpExecError("Can't connect to node %s." % node)
3772

    
3773
    if instance.name not in node_insts:
3774
      raise errors.OpExecError("Instance %s is not running." % instance.name)
3775

    
3776
    logging.debug("Connecting to console of %s on %s", instance.name, node)
3777

    
3778
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
3779
    console_cmd = hyper.GetShellCommandForConsole(instance)
3780

    
3781
    # build ssh cmdline
3782
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
3783

    
3784

    
3785
class LUReplaceDisks(LogicalUnit):
3786
  """Replace the disks of an instance.
3787

3788
  """
3789
  HPATH = "mirrors-replace"
3790
  HTYPE = constants.HTYPE_INSTANCE
3791
  _OP_REQP = ["instance_name", "mode", "disks"]
3792
  REQ_BGL = False
3793

    
3794
  def ExpandNames(self):
3795
    self._ExpandAndLockInstance()
3796

    
3797
    if not hasattr(self.op, "remote_node"):
3798
      self.op.remote_node = None
3799

    
3800
    ia_name = getattr(self.op, "iallocator", None)
3801
    if ia_name is not None:
3802
      if self.op.remote_node is not None:
3803
        raise errors.OpPrereqError("Give either the iallocator or the new"
3804
                                   " secondary, not both")
3805
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3806
    elif self.op.remote_node is not None:
3807
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
3808
      if remote_node is None:
3809
        raise errors.OpPrereqError("Node '%s' not known" %
3810
                                   self.op.remote_node)
3811
      self.op.remote_node = remote_node
3812
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
3813
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3814
    else:
3815
      self.needed_locks[locking.LEVEL_NODE] = []
3816
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3817

    
3818
  def DeclareLocks(self, level):
3819
    # If we're not already locking all nodes in the set we have to declare the
3820
    # instance's primary/secondary nodes.
3821
    if (level == locking.LEVEL_NODE and
3822
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
3823
      self._LockInstancesNodes()
3824

    
3825
  def _RunAllocator(self):
3826
    """Compute a new secondary node using an IAllocator.
3827

3828
    """
3829
    ial = IAllocator(self,
3830
                     mode=constants.IALLOCATOR_MODE_RELOC,
3831
                     name=self.op.instance_name,
3832
                     relocate_from=[self.sec_node])
3833

    
3834
    ial.Run(self.op.iallocator)
3835

    
3836
    if not ial.success:
3837
      raise errors.OpPrereqError("Can't compute nodes using"
3838
                                 " iallocator '%s': %s" % (self.op.iallocator,
3839
                                                           ial.info))
3840
    if len(ial.nodes) != ial.required_nodes:
3841
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
3842
                                 " of nodes (%s), required %s" %
3843
                                 (len(ial.nodes), ial.required_nodes))
3844
    self.op.remote_node = ial.nodes[0]
3845
    self.LogInfo("Selected new secondary for the instance: %s",
3846
                 self.op.remote_node)
3847

    
3848
  def BuildHooksEnv(self):
3849
    """Build hooks env.
3850

3851
    This runs on the master, the primary and all the secondaries.
3852

3853
    """
3854
    env = {
3855
      "MODE": self.op.mode,
3856
      "NEW_SECONDARY": self.op.remote_node,
3857
      "OLD_SECONDARY": self.instance.secondary_nodes[0],
3858
      }
3859
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3860
    nl = [
3861
      self.cfg.GetMasterNode(),
3862
      self.instance.primary_node,
3863
      ]
3864
    if self.op.remote_node is not None:
3865
      nl.append(self.op.remote_node)
3866
    return env, nl, nl
3867

    
3868
  def CheckPrereq(self):
3869
    """Check prerequisites.
3870

3871
    This checks that the instance is in the cluster.
3872

3873
    """
3874
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3875
    assert instance is not None, \
3876
      "Cannot retrieve locked instance %s" % self.op.instance_name
3877
    self.instance = instance
3878

    
3879
    if instance.disk_template not in constants.DTS_NET_MIRROR:
3880
      raise errors.OpPrereqError("Instance's disk layout is not"
3881
                                 " network mirrored.")
3882

    
3883
    if len(instance.secondary_nodes) != 1:
3884
      raise errors.OpPrereqError("The instance has a strange layout,"
3885
                                 " expected one secondary but found %d" %
3886
                                 len(instance.secondary_nodes))
3887

    
3888
    self.sec_node = instance.secondary_nodes[0]
3889

    
3890
    ia_name = getattr(self.op, "iallocator", None)
3891
    if ia_name is not None:
3892
      self._RunAllocator()
3893

    
3894
    remote_node = self.op.remote_node
3895
    if remote_node is not None:
3896
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
3897
      assert self.remote_node_info is not None, \
3898
        "Cannot retrieve locked node %s" % remote_node
3899
    else:
3900
      self.remote_node_info = None
3901
    if remote_node == instance.primary_node:
3902
      raise errors.OpPrereqError("The specified node is the primary node of"
3903
                                 " the instance.")
3904
    elif remote_node == self.sec_node:
3905
      if self.op.mode == constants.REPLACE_DISK_SEC:
3906
        # this is for DRBD8, where we can't execute the same mode of
3907
        # replacement as for drbd7 (no different port allocated)
3908
        raise errors.OpPrereqError("Same secondary given, cannot execute"
3909
                                   " replacement")
3910
    if instance.disk_template == constants.DT_DRBD8:
3911
      if (self.op.mode == constants.REPLACE_DISK_ALL and
3912
          remote_node is not None):
3913
        # switch to replace secondary mode
3914
        self.op.mode = constants.REPLACE_DISK_SEC
3915

    
3916
      if self.op.mode == constants.REPLACE_DISK_ALL:
3917
        raise errors.OpPrereqError("Template 'drbd' only allows primary or"
3918
                                   " secondary disk replacement, not"
3919
                                   " both at once")
3920
      elif self.op.mode == constants.REPLACE_DISK_PRI:
3921
        if remote_node is not None:
3922
          raise errors.OpPrereqError("Template 'drbd' does not allow changing"
3923
                                     " the secondary while doing a primary"
3924
                                     " node disk replacement")
3925
        self.tgt_node = instance.primary_node
3926
        self.oth_node = instance.secondary_nodes[0]
3927
      elif self.op.mode == constants.REPLACE_DISK_SEC:
3928
        self.new_node = remote_node # this can be None, in which case
3929
                                    # we don't change the secondary
3930
        self.tgt_node = instance.secondary_nodes[0]
3931
        self.oth_node = instance.primary_node
3932
      else:
3933
        raise errors.ProgrammerError("Unhandled disk replace mode")
3934

    
3935
    for name in self.op.disks:
3936
      if instance.FindDisk(name) is None:
3937
        raise errors.OpPrereqError("Disk '%s' not found for instance '%s'" %
3938
                                   (name, instance.name))
3939

    
3940
  def _ExecD8DiskOnly(self, feedback_fn):
3941
    """Replace a disk on the primary or secondary for dbrd8.
3942

3943
    The algorithm for replace is quite complicated:
3944
      - for each disk to be replaced:
3945
        - create new LVs on the target node with unique names
3946
        - detach old LVs from the drbd device
3947
        - rename old LVs to name_replaced.<time_t>
3948
        - rename new LVs to old LVs
3949
        - attach the new LVs (with the old names now) to the drbd device
3950
      - wait for sync across all devices
3951
      - for each modified disk:
3952
        - remove old LVs (which have the name name_replaces.<time_t>)
3953

3954
    Failures are not very well handled.
3955

3956
    """
3957
    steps_total = 6
3958
    warning, info = (self.proc.LogWarning, self.proc.LogInfo)
3959
    instance = self.instance
3960
    iv_names = {}
3961
    vgname = self.cfg.GetVGName()
3962
    # start of work
3963
    cfg = self.cfg
3964
    tgt_node = self.tgt_node
3965
    oth_node = self.oth_node
3966

    
3967
    # Step: check device activation
3968
    self.proc.LogStep(1, steps_total, "check device existence")
3969
    info("checking volume groups")
3970
    my_vg = cfg.GetVGName()
3971
    results = self.rpc.call_vg_list([oth_node, tgt_node])
3972
    if not results:
3973
      raise errors.OpExecError("Can't list volume groups on the nodes")
3974
    for node in oth_node, tgt_node:
3975
      res = results.get(node, False)
3976
      if not res or my_vg not in res:
3977
        raise errors.OpExecError("Volume group '%s' not found on %s" %
3978
                                 (my_vg, node))
3979
    for dev in instance.disks:
3980
      if not dev.iv_name in self.op.disks:
3981
        continue
3982
      for node in tgt_node, oth_node:
3983
        info("checking %s on %s" % (dev.iv_name, node))
3984
        cfg.SetDiskID(dev, node)
3985
        if not self.rpc.call_blockdev_find(node, dev):
3986
          raise errors.OpExecError("Can't find device %s on node %s" %
3987
                                   (dev.iv_name, node))
3988

    
3989
    # Step: check other node consistency
3990
    self.proc.LogStep(2, steps_total, "check peer consistency")
3991
    for dev in instance.disks:
3992
      if not dev.iv_name in self.op.disks:
3993
        continue
3994
      info("checking %s consistency on %s" % (dev.iv_name, oth_node))
3995
      if not _CheckDiskConsistency(self, dev, oth_node,
3996
                                   oth_node==instance.primary_node):
3997
        <