Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 60975797

History | View | Annotate | Download (250.9 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0613,W0201
25

    
26
import os
27
import os.path
28
import time
29
import tempfile
30
import re
31
import platform
32
import logging
33
import copy
34
import random
35

    
36
from ganeti import ssh
37
from ganeti import utils
38
from ganeti import errors
39
from ganeti import hypervisor
40
from ganeti import locking
41
from ganeti import constants
42
from ganeti import objects
43
from ganeti import opcodes
44
from ganeti import serializer
45
from ganeti import ssconf
46

    
47

    
48
class LogicalUnit(object):
49
  """Logical Unit base class.
50

51
  Subclasses must follow these rules:
52
    - implement ExpandNames
53
    - implement CheckPrereq
54
    - implement Exec
55
    - implement BuildHooksEnv
56
    - redefine HPATH and HTYPE
57
    - optionally redefine their run requirements:
58
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
59

60
  Note that all commands require root permissions.
61

62
  """
63
  HPATH = None
64
  HTYPE = None
65
  _OP_REQP = []
66
  REQ_BGL = True
67

    
68
  def __init__(self, processor, op, context, rpc):
69
    """Constructor for LogicalUnit.
70

71
    This needs to be overriden in derived classes in order to check op
72
    validity.
73

74
    """
75
    self.proc = processor
76
    self.op = op
77
    self.cfg = context.cfg
78
    self.context = context
79
    self.rpc = rpc
80
    # Dicts used to declare locking needs to mcpu
81
    self.needed_locks = None
82
    self.acquired_locks = {}
83
    self.share_locks = dict(((i, 0) for i in locking.LEVELS))
84
    self.add_locks = {}
85
    self.remove_locks = {}
86
    # Used to force good behavior when calling helper functions
87
    self.recalculate_locks = {}
88
    self.__ssh = None
89
    # logging
90
    self.LogWarning = processor.LogWarning
91
    self.LogInfo = processor.LogInfo
92

    
93
    for attr_name in self._OP_REQP:
94
      attr_val = getattr(op, attr_name, None)
95
      if attr_val is None:
96
        raise errors.OpPrereqError("Required parameter '%s' missing" %
97
                                   attr_name)
98
    self.CheckArguments()
99

    
100
  def __GetSSH(self):
101
    """Returns the SshRunner object
102

103
    """
104
    if not self.__ssh:
105
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
106
    return self.__ssh
107

    
108
  ssh = property(fget=__GetSSH)
109

    
110
  def CheckArguments(self):
111
    """Check syntactic validity for the opcode arguments.
112

113
    This method is for doing a simple syntactic check and ensure
114
    validity of opcode parameters, without any cluster-related
115
    checks. While the same can be accomplished in ExpandNames and/or
116
    CheckPrereq, doing these separate is better because:
117

118
      - ExpandNames is left as as purely a lock-related function
119
      - CheckPrereq is run after we have aquired locks (and possible
120
        waited for them)
121

122
    The function is allowed to change the self.op attribute so that
123
    later methods can no longer worry about missing parameters.
124

125
    """
126
    pass
127

    
128
  def ExpandNames(self):
129
    """Expand names for this LU.
130

131
    This method is called before starting to execute the opcode, and it should
132
    update all the parameters of the opcode to their canonical form (e.g. a
133
    short node name must be fully expanded after this method has successfully
134
    completed). This way locking, hooks, logging, ecc. can work correctly.
135

136
    LUs which implement this method must also populate the self.needed_locks
137
    member, as a dict with lock levels as keys, and a list of needed lock names
138
    as values. Rules:
139

140
      - use an empty dict if you don't need any lock
141
      - if you don't need any lock at a particular level omit that level
142
      - don't put anything for the BGL level
143
      - if you want all locks at a level use locking.ALL_SET as a value
144

145
    If you need to share locks (rather than acquire them exclusively) at one
146
    level you can modify self.share_locks, setting a true value (usually 1) for
147
    that level. By default locks are not shared.
148

149
    Examples::
150

151
      # Acquire all nodes and one instance
152
      self.needed_locks = {
153
        locking.LEVEL_NODE: locking.ALL_SET,
154
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
155
      }
156
      # Acquire just two nodes
157
      self.needed_locks = {
158
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
159
      }
160
      # Acquire no locks
161
      self.needed_locks = {} # No, you can't leave it to the default value None
162

163
    """
164
    # The implementation of this method is mandatory only if the new LU is
165
    # concurrent, so that old LUs don't need to be changed all at the same
166
    # time.
167
    if self.REQ_BGL:
168
      self.needed_locks = {} # Exclusive LUs don't need locks.
169
    else:
170
      raise NotImplementedError
171

    
172
  def DeclareLocks(self, level):
173
    """Declare LU locking needs for a level
174

175
    While most LUs can just declare their locking needs at ExpandNames time,
176
    sometimes there's the need to calculate some locks after having acquired
177
    the ones before. This function is called just before acquiring locks at a
178
    particular level, but after acquiring the ones at lower levels, and permits
179
    such calculations. It can be used to modify self.needed_locks, and by
180
    default it does nothing.
181

182
    This function is only called if you have something already set in
183
    self.needed_locks for the level.
184

185
    @param level: Locking level which is going to be locked
186
    @type level: member of ganeti.locking.LEVELS
187

188
    """
189

    
190
  def CheckPrereq(self):
191
    """Check prerequisites for this LU.
192

193
    This method should check that the prerequisites for the execution
194
    of this LU are fulfilled. It can do internode communication, but
195
    it should be idempotent - no cluster or system changes are
196
    allowed.
197

198
    The method should raise errors.OpPrereqError in case something is
199
    not fulfilled. Its return value is ignored.
200

201
    This method should also update all the parameters of the opcode to
202
    their canonical form if it hasn't been done by ExpandNames before.
203

204
    """
205
    raise NotImplementedError
206

    
207
  def Exec(self, feedback_fn):
208
    """Execute the LU.
209

210
    This method should implement the actual work. It should raise
211
    errors.OpExecError for failures that are somewhat dealt with in
212
    code, or expected.
213

214
    """
215
    raise NotImplementedError
216

    
217
  def BuildHooksEnv(self):
218
    """Build hooks environment for this LU.
219

220
    This method should return a three-node tuple consisting of: a dict
221
    containing the environment that will be used for running the
222
    specific hook for this LU, a list of node names on which the hook
223
    should run before the execution, and a list of node names on which
224
    the hook should run after the execution.
225

226
    The keys of the dict must not have 'GANETI_' prefixed as this will
227
    be handled in the hooks runner. Also note additional keys will be
228
    added by the hooks runner. If the LU doesn't define any
229
    environment, an empty dict (and not None) should be returned.
230

231
    No nodes should be returned as an empty list (and not None).
232

233
    Note that if the HPATH for a LU class is None, this function will
234
    not be called.
235

236
    """
237
    raise NotImplementedError
238

    
239
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
240
    """Notify the LU about the results of its hooks.
241

242
    This method is called every time a hooks phase is executed, and notifies
243
    the Logical Unit about the hooks' result. The LU can then use it to alter
244
    its result based on the hooks.  By default the method does nothing and the
245
    previous result is passed back unchanged but any LU can define it if it
246
    wants to use the local cluster hook-scripts somehow.
247

248
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
249
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
250
    @param hook_results: the results of the multi-node hooks rpc call
251
    @param feedback_fn: function used send feedback back to the caller
252
    @param lu_result: the previous Exec result this LU had, or None
253
        in the PRE phase
254
    @return: the new Exec result, based on the previous result
255
        and hook results
256

257
    """
258
    return lu_result
259

    
260
  def _ExpandAndLockInstance(self):
261
    """Helper function to expand and lock an instance.
262

263
    Many LUs that work on an instance take its name in self.op.instance_name
264
    and need to expand it and then declare the expanded name for locking. This
265
    function does it, and then updates self.op.instance_name to the expanded
266
    name. It also initializes needed_locks as a dict, if this hasn't been done
267
    before.
268

269
    """
270
    if self.needed_locks is None:
271
      self.needed_locks = {}
272
    else:
273
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
274
        "_ExpandAndLockInstance called with instance-level locks set"
275
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
276
    if expanded_name is None:
277
      raise errors.OpPrereqError("Instance '%s' not known" %
278
                                  self.op.instance_name)
279
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
280
    self.op.instance_name = expanded_name
281

    
282
  def _LockInstancesNodes(self, primary_only=False):
283
    """Helper function to declare instances' nodes for locking.
284

285
    This function should be called after locking one or more instances to lock
286
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
287
    with all primary or secondary nodes for instances already locked and
288
    present in self.needed_locks[locking.LEVEL_INSTANCE].
289

290
    It should be called from DeclareLocks, and for safety only works if
291
    self.recalculate_locks[locking.LEVEL_NODE] is set.
292

293
    In the future it may grow parameters to just lock some instance's nodes, or
294
    to just lock primaries or secondary nodes, if needed.
295

296
    If should be called in DeclareLocks in a way similar to::
297

298
      if level == locking.LEVEL_NODE:
299
        self._LockInstancesNodes()
300

301
    @type primary_only: boolean
302
    @param primary_only: only lock primary nodes of locked instances
303

304
    """
305
    assert locking.LEVEL_NODE in self.recalculate_locks, \
306
      "_LockInstancesNodes helper function called with no nodes to recalculate"
307

    
308
    # TODO: check if we're really been called with the instance locks held
309

    
310
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
311
    # future we might want to have different behaviors depending on the value
312
    # of self.recalculate_locks[locking.LEVEL_NODE]
313
    wanted_nodes = []
314
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
315
      instance = self.context.cfg.GetInstanceInfo(instance_name)
316
      wanted_nodes.append(instance.primary_node)
317
      if not primary_only:
318
        wanted_nodes.extend(instance.secondary_nodes)
319

    
320
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
321
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
322
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
323
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
324

    
325
    del self.recalculate_locks[locking.LEVEL_NODE]
326

    
327

    
328
class NoHooksLU(LogicalUnit):
329
  """Simple LU which runs no hooks.
330

331
  This LU is intended as a parent for other LogicalUnits which will
332
  run no hooks, in order to reduce duplicate code.
333

334
  """
335
  HPATH = None
336
  HTYPE = None
337

    
338

    
339
def _GetWantedNodes(lu, nodes):
340
  """Returns list of checked and expanded node names.
341

342
  @type lu: L{LogicalUnit}
343
  @param lu: the logical unit on whose behalf we execute
344
  @type nodes: list
345
  @param nodes: list of node names or None for all nodes
346
  @rtype: list
347
  @return: the list of nodes, sorted
348
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
349

350
  """
351
  if not isinstance(nodes, list):
352
    raise errors.OpPrereqError("Invalid argument type 'nodes'")
353

    
354
  if not nodes:
355
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
356
      " non-empty list of nodes whose name is to be expanded.")
357

    
358
  wanted = []
359
  for name in nodes:
360
    node = lu.cfg.ExpandNodeName(name)
361
    if node is None:
362
      raise errors.OpPrereqError("No such node name '%s'" % name)
363
    wanted.append(node)
364

    
365
  return utils.NiceSort(wanted)
366

    
367

    
368
def _GetWantedInstances(lu, instances):
369
  """Returns list of checked and expanded instance names.
370

371
  @type lu: L{LogicalUnit}
372
  @param lu: the logical unit on whose behalf we execute
373
  @type instances: list
374
  @param instances: list of instance names or None for all instances
375
  @rtype: list
376
  @return: the list of instances, sorted
377
  @raise errors.OpPrereqError: if the instances parameter is wrong type
378
  @raise errors.OpPrereqError: if any of the passed instances is not found
379

380
  """
381
  if not isinstance(instances, list):
382
    raise errors.OpPrereqError("Invalid argument type 'instances'")
383

    
384
  if instances:
385
    wanted = []
386

    
387
    for name in instances:
388
      instance = lu.cfg.ExpandInstanceName(name)
389
      if instance is None:
390
        raise errors.OpPrereqError("No such instance name '%s'" % name)
391
      wanted.append(instance)
392

    
393
  else:
394
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
395
  return wanted
396

    
397

    
398
def _CheckOutputFields(static, dynamic, selected):
399
  """Checks whether all selected fields are valid.
400

401
  @type static: L{utils.FieldSet}
402
  @param static: static fields set
403
  @type dynamic: L{utils.FieldSet}
404
  @param dynamic: dynamic fields set
405

406
  """
407
  f = utils.FieldSet()
408
  f.Extend(static)
409
  f.Extend(dynamic)
410

    
411
  delta = f.NonMatching(selected)
412
  if delta:
413
    raise errors.OpPrereqError("Unknown output fields selected: %s"
414
                               % ",".join(delta))
415

    
416

    
417
def _CheckBooleanOpField(op, name):
418
  """Validates boolean opcode parameters.
419

420
  This will ensure that an opcode parameter is either a boolean value,
421
  or None (but that it always exists).
422

423
  """
424
  val = getattr(op, name, None)
425
  if not (val is None or isinstance(val, bool)):
426
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
427
                               (name, str(val)))
428
  setattr(op, name, val)
429

    
430

    
431
def _CheckNodeOnline(lu, node):
432
  """Ensure that a given node is online.
433

434
  @param lu: the LU on behalf of which we make the check
435
  @param node: the node to check
436
  @raise errors.OpPrereqError: if the node is offline
437

438
  """
439
  if lu.cfg.GetNodeInfo(node).offline:
440
    raise errors.OpPrereqError("Can't use offline node %s" % node)
441

    
442

    
443
def _CheckNodeNotDrained(lu, node):
444
  """Ensure that a given node is not drained.
445

446
  @param lu: the LU on behalf of which we make the check
447
  @param node: the node to check
448
  @raise errors.OpPrereqError: if the node is drained
449

450
  """
451
  if lu.cfg.GetNodeInfo(node).drained:
452
    raise errors.OpPrereqError("Can't use drained node %s" % node)
453

    
454

    
455
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
456
                          memory, vcpus, nics, disk_template, disks,
457
                          bep, hvp, hypervisor):
458
  """Builds instance related env variables for hooks
459

460
  This builds the hook environment from individual variables.
461

462
  @type name: string
463
  @param name: the name of the instance
464
  @type primary_node: string
465
  @param primary_node: the name of the instance's primary node
466
  @type secondary_nodes: list
467
  @param secondary_nodes: list of secondary nodes as strings
468
  @type os_type: string
469
  @param os_type: the name of the instance's OS
470
  @type status: boolean
471
  @param status: the should_run status of the instance
472
  @type memory: string
473
  @param memory: the memory size of the instance
474
  @type vcpus: string
475
  @param vcpus: the count of VCPUs the instance has
476
  @type nics: list
477
  @param nics: list of tuples (ip, bridge, mac) representing
478
      the NICs the instance  has
479
  @type disk_template: string
480
  @param disk_template: the distk template of the instance
481
  @type disks: list
482
  @param disks: the list of (size, mode) pairs
483
  @type bep: dict
484
  @param bep: the backend parameters for the instance
485
  @type hvp: dict
486
  @param hvp: the hypervisor parameters for the instance
487
  @type hypervisor: string
488
  @param hypervisor: the hypervisor for the instance
489
  @rtype: dict
490
  @return: the hook environment for this instance
491

492
  """
493
  if status:
494
    str_status = "up"
495
  else:
496
    str_status = "down"
497
  env = {
498
    "OP_TARGET": name,
499
    "INSTANCE_NAME": name,
500
    "INSTANCE_PRIMARY": primary_node,
501
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
502
    "INSTANCE_OS_TYPE": os_type,
503
    "INSTANCE_STATUS": str_status,
504
    "INSTANCE_MEMORY": memory,
505
    "INSTANCE_VCPUS": vcpus,
506
    "INSTANCE_DISK_TEMPLATE": disk_template,
507
    "INSTANCE_HYPERVISOR": hypervisor,
508
  }
509

    
510
  if nics:
511
    nic_count = len(nics)
512
    for idx, (ip, bridge, mac) in enumerate(nics):
513
      if ip is None:
514
        ip = ""
515
      env["INSTANCE_NIC%d_IP" % idx] = ip
516
      env["INSTANCE_NIC%d_BRIDGE" % idx] = bridge
517
      env["INSTANCE_NIC%d_MAC" % idx] = mac
518
  else:
519
    nic_count = 0
520

    
521
  env["INSTANCE_NIC_COUNT"] = nic_count
522

    
523
  if disks:
524
    disk_count = len(disks)
525
    for idx, (size, mode) in enumerate(disks):
526
      env["INSTANCE_DISK%d_SIZE" % idx] = size
527
      env["INSTANCE_DISK%d_MODE" % idx] = mode
528
  else:
529
    disk_count = 0
530

    
531
  env["INSTANCE_DISK_COUNT"] = disk_count
532

    
533
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
534
    for key, value in source.items():
535
      env["INSTANCE_%s_%s" % (kind, key)] = value
536

    
537
  return env
538

    
539

    
540
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
541
  """Builds instance related env variables for hooks from an object.
542

543
  @type lu: L{LogicalUnit}
544
  @param lu: the logical unit on whose behalf we execute
545
  @type instance: L{objects.Instance}
546
  @param instance: the instance for which we should build the
547
      environment
548
  @type override: dict
549
  @param override: dictionary with key/values that will override
550
      our values
551
  @rtype: dict
552
  @return: the hook environment dictionary
553

554
  """
555
  cluster = lu.cfg.GetClusterInfo()
556
  bep = cluster.FillBE(instance)
557
  hvp = cluster.FillHV(instance)
558
  args = {
559
    'name': instance.name,
560
    'primary_node': instance.primary_node,
561
    'secondary_nodes': instance.secondary_nodes,
562
    'os_type': instance.os,
563
    'status': instance.admin_up,
564
    'memory': bep[constants.BE_MEMORY],
565
    'vcpus': bep[constants.BE_VCPUS],
566
    'nics': [(nic.ip, nic.bridge, nic.mac) for nic in instance.nics],
567
    'disk_template': instance.disk_template,
568
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
569
    'bep': bep,
570
    'hvp': hvp,
571
    'hypervisor': instance.hypervisor,
572
  }
573
  if override:
574
    args.update(override)
575
  return _BuildInstanceHookEnv(**args)
576

    
577

    
578
def _AdjustCandidatePool(lu):
579
  """Adjust the candidate pool after node operations.
580

581
  """
582
  mod_list = lu.cfg.MaintainCandidatePool()
583
  if mod_list:
584
    lu.LogInfo("Promoted nodes to master candidate role: %s",
585
               ", ".join(node.name for node in mod_list))
586
    for name in mod_list:
587
      lu.context.ReaddNode(name)
588
  mc_now, mc_max = lu.cfg.GetMasterCandidateStats()
589
  if mc_now > mc_max:
590
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
591
               (mc_now, mc_max))
592

    
593

    
594
def _CheckInstanceBridgesExist(lu, instance):
595
  """Check that the brigdes needed by an instance exist.
596

597
  """
598
  # check bridges existance
599
  brlist = [nic.bridge for nic in instance.nics]
600
  result = lu.rpc.call_bridges_exist(instance.primary_node, brlist)
601
  result.Raise()
602
  if not result.data:
603
    raise errors.OpPrereqError("One or more target bridges %s does not"
604
                               " exist on destination node '%s'" %
605
                               (brlist, instance.primary_node))
606

    
607

    
608
class LUDestroyCluster(NoHooksLU):
609
  """Logical unit for destroying the cluster.
610

611
  """
612
  _OP_REQP = []
613

    
614
  def CheckPrereq(self):
615
    """Check prerequisites.
616

617
    This checks whether the cluster is empty.
618

619
    Any errors are signalled by raising errors.OpPrereqError.
620

621
    """
622
    master = self.cfg.GetMasterNode()
623

    
624
    nodelist = self.cfg.GetNodeList()
625
    if len(nodelist) != 1 or nodelist[0] != master:
626
      raise errors.OpPrereqError("There are still %d node(s) in"
627
                                 " this cluster." % (len(nodelist) - 1))
628
    instancelist = self.cfg.GetInstanceList()
629
    if instancelist:
630
      raise errors.OpPrereqError("There are still %d instance(s) in"
631
                                 " this cluster." % len(instancelist))
632

    
633
  def Exec(self, feedback_fn):
634
    """Destroys the cluster.
635

636
    """
637
    master = self.cfg.GetMasterNode()
638
    result = self.rpc.call_node_stop_master(master, False)
639
    result.Raise()
640
    if not result.data:
641
      raise errors.OpExecError("Could not disable the master role")
642
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
643
    utils.CreateBackup(priv_key)
644
    utils.CreateBackup(pub_key)
645
    return master
646

    
647

    
648
class LUVerifyCluster(LogicalUnit):
649
  """Verifies the cluster status.
650

651
  """
652
  HPATH = "cluster-verify"
653
  HTYPE = constants.HTYPE_CLUSTER
654
  _OP_REQP = ["skip_checks"]
655
  REQ_BGL = False
656

    
657
  def ExpandNames(self):
658
    self.needed_locks = {
659
      locking.LEVEL_NODE: locking.ALL_SET,
660
      locking.LEVEL_INSTANCE: locking.ALL_SET,
661
    }
662
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
663

    
664
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
665
                  node_result, feedback_fn, master_files,
666
                  drbd_map, vg_name):
667
    """Run multiple tests against a node.
668

669
    Test list:
670

671
      - compares ganeti version
672
      - checks vg existance and size > 20G
673
      - checks config file checksum
674
      - checks ssh to other nodes
675

676
    @type nodeinfo: L{objects.Node}
677
    @param nodeinfo: the node to check
678
    @param file_list: required list of files
679
    @param local_cksum: dictionary of local files and their checksums
680
    @param node_result: the results from the node
681
    @param feedback_fn: function used to accumulate results
682
    @param master_files: list of files that only masters should have
683
    @param drbd_map: the useddrbd minors for this node, in
684
        form of minor: (instance, must_exist) which correspond to instances
685
        and their running status
686
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
687

688
    """
689
    node = nodeinfo.name
690

    
691
    # main result, node_result should be a non-empty dict
692
    if not node_result or not isinstance(node_result, dict):
693
      feedback_fn("  - ERROR: unable to verify node %s." % (node,))
694
      return True
695

    
696
    # compares ganeti version
697
    local_version = constants.PROTOCOL_VERSION
698
    remote_version = node_result.get('version', None)
699
    if not (remote_version and isinstance(remote_version, (list, tuple)) and
700
            len(remote_version) == 2):
701
      feedback_fn("  - ERROR: connection to %s failed" % (node))
702
      return True
703

    
704
    if local_version != remote_version[0]:
705
      feedback_fn("  - ERROR: incompatible protocol versions: master %s,"
706
                  " node %s %s" % (local_version, node, remote_version[0]))
707
      return True
708

    
709
    # node seems compatible, we can actually try to look into its results
710

    
711
    bad = False
712

    
713
    # full package version
714
    if constants.RELEASE_VERSION != remote_version[1]:
715
      feedback_fn("  - WARNING: software version mismatch: master %s,"
716
                  " node %s %s" %
717
                  (constants.RELEASE_VERSION, node, remote_version[1]))
718

    
719
    # checks vg existence and size > 20G
720
    if vg_name is not None:
721
      vglist = node_result.get(constants.NV_VGLIST, None)
722
      if not vglist:
723
        feedback_fn("  - ERROR: unable to check volume groups on node %s." %
724
                        (node,))
725
        bad = True
726
      else:
727
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
728
                                              constants.MIN_VG_SIZE)
729
        if vgstatus:
730
          feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
731
          bad = True
732

    
733
    # checks config file checksum
734

    
735
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
736
    if not isinstance(remote_cksum, dict):
737
      bad = True
738
      feedback_fn("  - ERROR: node hasn't returned file checksum data")
739
    else:
740
      for file_name in file_list:
741
        node_is_mc = nodeinfo.master_candidate
742
        must_have_file = file_name not in master_files
743
        if file_name not in remote_cksum:
744
          if node_is_mc or must_have_file:
745
            bad = True
746
            feedback_fn("  - ERROR: file '%s' missing" % file_name)
747
        elif remote_cksum[file_name] != local_cksum[file_name]:
748
          if node_is_mc or must_have_file:
749
            bad = True
750
            feedback_fn("  - ERROR: file '%s' has wrong checksum" % file_name)
751
          else:
752
            # not candidate and this is not a must-have file
753
            bad = True
754
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
755
                        " candidates (and the file is outdated)" % file_name)
756
        else:
757
          # all good, except non-master/non-must have combination
758
          if not node_is_mc and not must_have_file:
759
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
760
                        " candidates" % file_name)
761

    
762
    # checks ssh to any
763

    
764
    if constants.NV_NODELIST not in node_result:
765
      bad = True
766
      feedback_fn("  - ERROR: node hasn't returned node ssh connectivity data")
767
    else:
768
      if node_result[constants.NV_NODELIST]:
769
        bad = True
770
        for node in node_result[constants.NV_NODELIST]:
771
          feedback_fn("  - ERROR: ssh communication with node '%s': %s" %
772
                          (node, node_result[constants.NV_NODELIST][node]))
773

    
774
    if constants.NV_NODENETTEST not in node_result:
775
      bad = True
776
      feedback_fn("  - ERROR: node hasn't returned node tcp connectivity data")
777
    else:
778
      if node_result[constants.NV_NODENETTEST]:
779
        bad = True
780
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
781
        for node in nlist:
782
          feedback_fn("  - ERROR: tcp communication with node '%s': %s" %
783
                          (node, node_result[constants.NV_NODENETTEST][node]))
784

    
785
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
786
    if isinstance(hyp_result, dict):
787
      for hv_name, hv_result in hyp_result.iteritems():
788
        if hv_result is not None:
789
          feedback_fn("  - ERROR: hypervisor %s verify failure: '%s'" %
790
                      (hv_name, hv_result))
791

    
792
    # check used drbd list
793
    if vg_name is not None:
794
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
795
      if not isinstance(used_minors, (tuple, list)):
796
        feedback_fn("  - ERROR: cannot parse drbd status file: %s" %
797
                    str(used_minors))
798
      else:
799
        for minor, (iname, must_exist) in drbd_map.items():
800
          if minor not in used_minors and must_exist:
801
            feedback_fn("  - ERROR: drbd minor %d of instance %s is"
802
                        " not active" % (minor, iname))
803
            bad = True
804
        for minor in used_minors:
805
          if minor not in drbd_map:
806
            feedback_fn("  - ERROR: unallocated drbd minor %d is in use" %
807
                        minor)
808
            bad = True
809

    
810
    return bad
811

    
812
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
813
                      node_instance, feedback_fn, n_offline):
814
    """Verify an instance.
815

816
    This function checks to see if the required block devices are
817
    available on the instance's node.
818

819
    """
820
    bad = False
821

    
822
    node_current = instanceconfig.primary_node
823

    
824
    node_vol_should = {}
825
    instanceconfig.MapLVsByNode(node_vol_should)
826

    
827
    for node in node_vol_should:
828
      if node in n_offline:
829
        # ignore missing volumes on offline nodes
830
        continue
831
      for volume in node_vol_should[node]:
832
        if node not in node_vol_is or volume not in node_vol_is[node]:
833
          feedback_fn("  - ERROR: volume %s missing on node %s" %
834
                          (volume, node))
835
          bad = True
836

    
837
    if instanceconfig.admin_up:
838
      if ((node_current not in node_instance or
839
          not instance in node_instance[node_current]) and
840
          node_current not in n_offline):
841
        feedback_fn("  - ERROR: instance %s not running on node %s" %
842
                        (instance, node_current))
843
        bad = True
844

    
845
    for node in node_instance:
846
      if (not node == node_current):
847
        if instance in node_instance[node]:
848
          feedback_fn("  - ERROR: instance %s should not run on node %s" %
849
                          (instance, node))
850
          bad = True
851

    
852
    return bad
853

    
854
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
855
    """Verify if there are any unknown volumes in the cluster.
856

857
    The .os, .swap and backup volumes are ignored. All other volumes are
858
    reported as unknown.
859

860
    """
861
    bad = False
862

    
863
    for node in node_vol_is:
864
      for volume in node_vol_is[node]:
865
        if node not in node_vol_should or volume not in node_vol_should[node]:
866
          feedback_fn("  - ERROR: volume %s on node %s should not exist" %
867
                      (volume, node))
868
          bad = True
869
    return bad
870

    
871
  def _VerifyOrphanInstances(self, instancelist, node_instance, feedback_fn):
872
    """Verify the list of running instances.
873

874
    This checks what instances are running but unknown to the cluster.
875

876
    """
877
    bad = False
878
    for node in node_instance:
879
      for runninginstance in node_instance[node]:
880
        if runninginstance not in instancelist:
881
          feedback_fn("  - ERROR: instance %s on node %s should not exist" %
882
                          (runninginstance, node))
883
          bad = True
884
    return bad
885

    
886
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg, feedback_fn):
887
    """Verify N+1 Memory Resilience.
888

889
    Check that if one single node dies we can still start all the instances it
890
    was primary for.
891

892
    """
893
    bad = False
894

    
895
    for node, nodeinfo in node_info.iteritems():
896
      # This code checks that every node which is now listed as secondary has
897
      # enough memory to host all instances it is supposed to should a single
898
      # other node in the cluster fail.
899
      # FIXME: not ready for failover to an arbitrary node
900
      # FIXME: does not support file-backed instances
901
      # WARNING: we currently take into account down instances as well as up
902
      # ones, considering that even if they're down someone might want to start
903
      # them even in the event of a node failure.
904
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
905
        needed_mem = 0
906
        for instance in instances:
907
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
908
          if bep[constants.BE_AUTO_BALANCE]:
909
            needed_mem += bep[constants.BE_MEMORY]
910
        if nodeinfo['mfree'] < needed_mem:
911
          feedback_fn("  - ERROR: not enough memory on node %s to accomodate"
912
                      " failovers should node %s fail" % (node, prinode))
913
          bad = True
914
    return bad
915

    
916
  def CheckPrereq(self):
917
    """Check prerequisites.
918

919
    Transform the list of checks we're going to skip into a set and check that
920
    all its members are valid.
921

922
    """
923
    self.skip_set = frozenset(self.op.skip_checks)
924
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
925
      raise errors.OpPrereqError("Invalid checks to be skipped specified")
926

    
927
  def BuildHooksEnv(self):
928
    """Build hooks env.
929

930
    Cluster-Verify hooks just rone in the post phase and their failure makes
931
    the output be logged in the verify output and the verification to fail.
932

933
    """
934
    all_nodes = self.cfg.GetNodeList()
935
    env = {
936
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
937
      }
938
    for node in self.cfg.GetAllNodesInfo().values():
939
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
940

    
941
    return env, [], all_nodes
942

    
943
  def Exec(self, feedback_fn):
944
    """Verify integrity of cluster, performing various test on nodes.
945

946
    """
947
    bad = False
948
    feedback_fn("* Verifying global settings")
949
    for msg in self.cfg.VerifyConfig():
950
      feedback_fn("  - ERROR: %s" % msg)
951

    
952
    vg_name = self.cfg.GetVGName()
953
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
954
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
955
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
956
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
957
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
958
                        for iname in instancelist)
959
    i_non_redundant = [] # Non redundant instances
960
    i_non_a_balanced = [] # Non auto-balanced instances
961
    n_offline = [] # List of offline nodes
962
    n_drained = [] # List of nodes being drained
963
    node_volume = {}
964
    node_instance = {}
965
    node_info = {}
966
    instance_cfg = {}
967

    
968
    # FIXME: verify OS list
969
    # do local checksums
970
    master_files = [constants.CLUSTER_CONF_FILE]
971

    
972
    file_names = ssconf.SimpleStore().GetFileList()
973
    file_names.append(constants.SSL_CERT_FILE)
974
    file_names.append(constants.RAPI_CERT_FILE)
975
    file_names.extend(master_files)
976

    
977
    local_checksums = utils.FingerprintFiles(file_names)
978

    
979
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
980
    node_verify_param = {
981
      constants.NV_FILELIST: file_names,
982
      constants.NV_NODELIST: [node.name for node in nodeinfo
983
                              if not node.offline],
984
      constants.NV_HYPERVISOR: hypervisors,
985
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
986
                                  node.secondary_ip) for node in nodeinfo
987
                                 if not node.offline],
988
      constants.NV_INSTANCELIST: hypervisors,
989
      constants.NV_VERSION: None,
990
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
991
      }
992
    if vg_name is not None:
993
      node_verify_param[constants.NV_VGLIST] = None
994
      node_verify_param[constants.NV_LVLIST] = vg_name
995
      node_verify_param[constants.NV_DRBDLIST] = None
996
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
997
                                           self.cfg.GetClusterName())
998

    
999
    cluster = self.cfg.GetClusterInfo()
1000
    master_node = self.cfg.GetMasterNode()
1001
    all_drbd_map = self.cfg.ComputeDRBDMap()
1002

    
1003
    for node_i in nodeinfo:
1004
      node = node_i.name
1005
      nresult = all_nvinfo[node].data
1006

    
1007
      if node_i.offline:
1008
        feedback_fn("* Skipping offline node %s" % (node,))
1009
        n_offline.append(node)
1010
        continue
1011

    
1012
      if node == master_node:
1013
        ntype = "master"
1014
      elif node_i.master_candidate:
1015
        ntype = "master candidate"
1016
      elif node_i.drained:
1017
        ntype = "drained"
1018
        n_drained.append(node)
1019
      else:
1020
        ntype = "regular"
1021
      feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1022

    
1023
      if all_nvinfo[node].failed or not isinstance(nresult, dict):
1024
        feedback_fn("  - ERROR: connection to %s failed" % (node,))
1025
        bad = True
1026
        continue
1027

    
1028
      node_drbd = {}
1029
      for minor, instance in all_drbd_map[node].items():
1030
        if instance not in instanceinfo:
1031
          feedback_fn("  - ERROR: ghost instance '%s' in temporary DRBD map" %
1032
                      instance)
1033
          # ghost instance should not be running, but otherwise we
1034
          # don't give double warnings (both ghost instance and
1035
          # unallocated minor in use)
1036
          node_drbd[minor] = (instance, False)
1037
        else:
1038
          instance = instanceinfo[instance]
1039
          node_drbd[minor] = (instance.name, instance.admin_up)
1040
      result = self._VerifyNode(node_i, file_names, local_checksums,
1041
                                nresult, feedback_fn, master_files,
1042
                                node_drbd, vg_name)
1043
      bad = bad or result
1044

    
1045
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1046
      if vg_name is None:
1047
        node_volume[node] = {}
1048
      elif isinstance(lvdata, basestring):
1049
        feedback_fn("  - ERROR: LVM problem on node %s: %s" %
1050
                    (node, utils.SafeEncode(lvdata)))
1051
        bad = True
1052
        node_volume[node] = {}
1053
      elif not isinstance(lvdata, dict):
1054
        feedback_fn("  - ERROR: connection to %s failed (lvlist)" % (node,))
1055
        bad = True
1056
        continue
1057
      else:
1058
        node_volume[node] = lvdata
1059

    
1060
      # node_instance
1061
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1062
      if not isinstance(idata, list):
1063
        feedback_fn("  - ERROR: connection to %s failed (instancelist)" %
1064
                    (node,))
1065
        bad = True
1066
        continue
1067

    
1068
      node_instance[node] = idata
1069

    
1070
      # node_info
1071
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1072
      if not isinstance(nodeinfo, dict):
1073
        feedback_fn("  - ERROR: connection to %s failed (hvinfo)" % (node,))
1074
        bad = True
1075
        continue
1076

    
1077
      try:
1078
        node_info[node] = {
1079
          "mfree": int(nodeinfo['memory_free']),
1080
          "pinst": [],
1081
          "sinst": [],
1082
          # dictionary holding all instances this node is secondary for,
1083
          # grouped by their primary node. Each key is a cluster node, and each
1084
          # value is a list of instances which have the key as primary and the
1085
          # current node as secondary.  this is handy to calculate N+1 memory
1086
          # availability if you can only failover from a primary to its
1087
          # secondary.
1088
          "sinst-by-pnode": {},
1089
        }
1090
        # FIXME: devise a free space model for file based instances as well
1091
        if vg_name is not None:
1092
          if (constants.NV_VGLIST not in nresult or
1093
              vg_name not in nresult[constants.NV_VGLIST]):
1094
            feedback_fn("  - ERROR: node %s didn't return data for the"
1095
                        " volume group '%s' - it is either missing or broken" %
1096
                        (node, vg_name))
1097
            bad = True
1098
            continue
1099
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1100
      except (ValueError, KeyError):
1101
        feedback_fn("  - ERROR: invalid nodeinfo value returned"
1102
                    " from node %s" % (node,))
1103
        bad = True
1104
        continue
1105

    
1106
    node_vol_should = {}
1107

    
1108
    for instance in instancelist:
1109
      feedback_fn("* Verifying instance %s" % instance)
1110
      inst_config = instanceinfo[instance]
1111
      result =  self._VerifyInstance(instance, inst_config, node_volume,
1112
                                     node_instance, feedback_fn, n_offline)
1113
      bad = bad or result
1114
      inst_nodes_offline = []
1115

    
1116
      inst_config.MapLVsByNode(node_vol_should)
1117

    
1118
      instance_cfg[instance] = inst_config
1119

    
1120
      pnode = inst_config.primary_node
1121
      if pnode in node_info:
1122
        node_info[pnode]['pinst'].append(instance)
1123
      elif pnode not in n_offline:
1124
        feedback_fn("  - ERROR: instance %s, connection to primary node"
1125
                    " %s failed" % (instance, pnode))
1126
        bad = True
1127

    
1128
      if pnode in n_offline:
1129
        inst_nodes_offline.append(pnode)
1130

    
1131
      # If the instance is non-redundant we cannot survive losing its primary
1132
      # node, so we are not N+1 compliant. On the other hand we have no disk
1133
      # templates with more than one secondary so that situation is not well
1134
      # supported either.
1135
      # FIXME: does not support file-backed instances
1136
      if len(inst_config.secondary_nodes) == 0:
1137
        i_non_redundant.append(instance)
1138
      elif len(inst_config.secondary_nodes) > 1:
1139
        feedback_fn("  - WARNING: multiple secondaries for instance %s"
1140
                    % instance)
1141

    
1142
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1143
        i_non_a_balanced.append(instance)
1144

    
1145
      for snode in inst_config.secondary_nodes:
1146
        if snode in node_info:
1147
          node_info[snode]['sinst'].append(instance)
1148
          if pnode not in node_info[snode]['sinst-by-pnode']:
1149
            node_info[snode]['sinst-by-pnode'][pnode] = []
1150
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1151
        elif snode not in n_offline:
1152
          feedback_fn("  - ERROR: instance %s, connection to secondary node"
1153
                      " %s failed" % (instance, snode))
1154
          bad = True
1155
        if snode in n_offline:
1156
          inst_nodes_offline.append(snode)
1157

    
1158
      if inst_nodes_offline:
1159
        # warn that the instance lives on offline nodes, and set bad=True
1160
        feedback_fn("  - ERROR: instance lives on offline node(s) %s" %
1161
                    ", ".join(inst_nodes_offline))
1162
        bad = True
1163

    
1164
    feedback_fn("* Verifying orphan volumes")
1165
    result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
1166
                                       feedback_fn)
1167
    bad = bad or result
1168

    
1169
    feedback_fn("* Verifying remaining instances")
1170
    result = self._VerifyOrphanInstances(instancelist, node_instance,
1171
                                         feedback_fn)
1172
    bad = bad or result
1173

    
1174
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1175
      feedback_fn("* Verifying N+1 Memory redundancy")
1176
      result = self._VerifyNPlusOneMemory(node_info, instance_cfg, feedback_fn)
1177
      bad = bad or result
1178

    
1179
    feedback_fn("* Other Notes")
1180
    if i_non_redundant:
1181
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1182
                  % len(i_non_redundant))
1183

    
1184
    if i_non_a_balanced:
1185
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1186
                  % len(i_non_a_balanced))
1187

    
1188
    if n_offline:
1189
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1190

    
1191
    if n_drained:
1192
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1193

    
1194
    return not bad
1195

    
1196
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1197
    """Analize the post-hooks' result
1198

1199
    This method analyses the hook result, handles it, and sends some
1200
    nicely-formatted feedback back to the user.
1201

1202
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1203
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1204
    @param hooks_results: the results of the multi-node hooks rpc call
1205
    @param feedback_fn: function used send feedback back to the caller
1206
    @param lu_result: previous Exec result
1207
    @return: the new Exec result, based on the previous result
1208
        and hook results
1209

1210
    """
1211
    # We only really run POST phase hooks, and are only interested in
1212
    # their results
1213
    if phase == constants.HOOKS_PHASE_POST:
1214
      # Used to change hooks' output to proper indentation
1215
      indent_re = re.compile('^', re.M)
1216
      feedback_fn("* Hooks Results")
1217
      if not hooks_results:
1218
        feedback_fn("  - ERROR: general communication failure")
1219
        lu_result = 1
1220
      else:
1221
        for node_name in hooks_results:
1222
          show_node_header = True
1223
          res = hooks_results[node_name]
1224
          if res.failed or res.data is False or not isinstance(res.data, list):
1225
            if res.offline:
1226
              # no need to warn or set fail return value
1227
              continue
1228
            feedback_fn("    Communication failure in hooks execution")
1229
            lu_result = 1
1230
            continue
1231
          for script, hkr, output in res.data:
1232
            if hkr == constants.HKR_FAIL:
1233
              # The node header is only shown once, if there are
1234
              # failing hooks on that node
1235
              if show_node_header:
1236
                feedback_fn("  Node %s:" % node_name)
1237
                show_node_header = False
1238
              feedback_fn("    ERROR: Script %s failed, output:" % script)
1239
              output = indent_re.sub('      ', output)
1240
              feedback_fn("%s" % output)
1241
              lu_result = 1
1242

    
1243
      return lu_result
1244

    
1245

    
1246
class LUVerifyDisks(NoHooksLU):
1247
  """Verifies the cluster disks status.
1248

1249
  """
1250
  _OP_REQP = []
1251
  REQ_BGL = False
1252

    
1253
  def ExpandNames(self):
1254
    self.needed_locks = {
1255
      locking.LEVEL_NODE: locking.ALL_SET,
1256
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1257
    }
1258
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1259

    
1260
  def CheckPrereq(self):
1261
    """Check prerequisites.
1262

1263
    This has no prerequisites.
1264

1265
    """
1266
    pass
1267

    
1268
  def Exec(self, feedback_fn):
1269
    """Verify integrity of cluster disks.
1270

1271
    """
1272
    result = res_nodes, res_nlvm, res_instances, res_missing = [], {}, [], {}
1273

    
1274
    vg_name = self.cfg.GetVGName()
1275
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1276
    instances = [self.cfg.GetInstanceInfo(name)
1277
                 for name in self.cfg.GetInstanceList()]
1278

    
1279
    nv_dict = {}
1280
    for inst in instances:
1281
      inst_lvs = {}
1282
      if (not inst.admin_up or
1283
          inst.disk_template not in constants.DTS_NET_MIRROR):
1284
        continue
1285
      inst.MapLVsByNode(inst_lvs)
1286
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1287
      for node, vol_list in inst_lvs.iteritems():
1288
        for vol in vol_list:
1289
          nv_dict[(node, vol)] = inst
1290

    
1291
    if not nv_dict:
1292
      return result
1293

    
1294
    node_lvs = self.rpc.call_volume_list(nodes, vg_name)
1295

    
1296
    to_act = set()
1297
    for node in nodes:
1298
      # node_volume
1299
      lvs = node_lvs[node]
1300
      if lvs.failed:
1301
        if not lvs.offline:
1302
          self.LogWarning("Connection to node %s failed: %s" %
1303
                          (node, lvs.data))
1304
        continue
1305
      lvs = lvs.data
1306
      if isinstance(lvs, basestring):
1307
        logging.warning("Error enumerating LVs on node %s: %s", node, lvs)
1308
        res_nlvm[node] = lvs
1309
        continue
1310
      elif not isinstance(lvs, dict):
1311
        logging.warning("Connection to node %s failed or invalid data"
1312
                        " returned", node)
1313
        res_nodes.append(node)
1314
        continue
1315

    
1316
      for lv_name, (_, lv_inactive, lv_online) in lvs.iteritems():
1317
        inst = nv_dict.pop((node, lv_name), None)
1318
        if (not lv_online and inst is not None
1319
            and inst.name not in res_instances):
1320
          res_instances.append(inst.name)
1321

    
1322
    # any leftover items in nv_dict are missing LVs, let's arrange the
1323
    # data better
1324
    for key, inst in nv_dict.iteritems():
1325
      if inst.name not in res_missing:
1326
        res_missing[inst.name] = []
1327
      res_missing[inst.name].append(key)
1328

    
1329
    return result
1330

    
1331

    
1332
class LURepairDiskSizes(NoHooksLU):
1333
  """Verifies the cluster disks sizes.
1334

1335
  """
1336
  _OP_REQP = ["instances"]
1337
  REQ_BGL = False
1338

    
1339
  def ExpandNames(self):
1340

    
1341
    if not isinstance(self.op.instances, list):
1342
      raise errors.OpPrereqError("Invalid argument type 'instances'")
1343

    
1344
    if self.op.instances:
1345
      self.wanted_names = []
1346
      for name in self.op.instances:
1347
        full_name = self.cfg.ExpandInstanceName(name)
1348
        if full_name is None:
1349
          raise errors.OpPrereqError("Instance '%s' not known" % name)
1350
        self.wanted_names.append(full_name)
1351
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
1352
      self.needed_locks = {
1353
        locking.LEVEL_NODE: [],
1354
        locking.LEVEL_INSTANCE: self.wanted_names,
1355
        }
1356
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1357
    else:
1358
      self.wanted_names = None
1359
      self.needed_locks = {
1360
        locking.LEVEL_NODE: locking.ALL_SET,
1361
        locking.LEVEL_INSTANCE: locking.ALL_SET,
1362
        }
1363
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1364

    
1365
  def DeclareLocks(self, level):
1366
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
1367
      self._LockInstancesNodes(primary_only=True)
1368

    
1369
  def CheckPrereq(self):
1370
    """Check prerequisites.
1371

1372
    This only checks the optional instance list against the existing names.
1373

1374
    """
1375
    if self.wanted_names is None:
1376
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1377

    
1378
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1379
                             in self.wanted_names]
1380

    
1381
  def Exec(self, feedback_fn):
1382
    """Verify the size of cluster disks.
1383

1384
    """
1385
    # TODO: check child disks too
1386
    # TODO: check differences in size between primary/secondary nodes
1387
    per_node_disks = {}
1388
    for instance in self.wanted_instances:
1389
      pnode = instance.primary_node
1390
      if pnode not in per_node_disks:
1391
        per_node_disks[pnode] = []
1392
      for idx, disk in enumerate(instance.disks):
1393
        per_node_disks[pnode].append((instance, idx, disk))
1394

    
1395
    changed = []
1396
    for node, dskl in per_node_disks.items():
1397
      result = self.rpc.call_blockdev_getsizes(node, [v[2] for v in dskl])
1398
      if result.failed:
1399
        self.LogWarning("Failure in blockdev_getsizes call to node"
1400
                        " %s, ignoring", node)
1401
        continue
1402
      if len(result.data) != len(dskl):
1403
        self.LogWarning("Invalid result from node %s, ignoring node results",
1404
                        node)
1405
        continue
1406
      for ((instance, idx, disk), size) in zip(dskl, result.data):
1407
        if size is None:
1408
          self.LogWarning("Disk %d of instance %s did not return size"
1409
                          " information, ignoring", idx, instance.name)
1410
          continue
1411
        if not isinstance(size, (int, long)):
1412
          self.LogWarning("Disk %d of instance %s did not return valid"
1413
                          " size information, ignoring", idx, instance.name)
1414
          continue
1415
        size = size >> 20
1416
        if size != disk.size:
1417
          self.LogInfo("Disk %d of instance %s has mismatched size,"
1418
                       " correcting: recorded %d, actual %d", idx,
1419
                       instance.name, disk.size, size)
1420
          disk.size = size
1421
          self.cfg.Update(instance)
1422
          changed.append((instance.name, idx, size))
1423
    return changed
1424

    
1425

    
1426
class LURenameCluster(LogicalUnit):
1427
  """Rename the cluster.
1428

1429
  """
1430
  HPATH = "cluster-rename"
1431
  HTYPE = constants.HTYPE_CLUSTER
1432
  _OP_REQP = ["name"]
1433

    
1434
  def BuildHooksEnv(self):
1435
    """Build hooks env.
1436

1437
    """
1438
    env = {
1439
      "OP_TARGET": self.cfg.GetClusterName(),
1440
      "NEW_NAME": self.op.name,
1441
      }
1442
    mn = self.cfg.GetMasterNode()
1443
    return env, [mn], [mn]
1444

    
1445
  def CheckPrereq(self):
1446
    """Verify that the passed name is a valid one.
1447

1448
    """
1449
    hostname = utils.HostInfo(self.op.name)
1450

    
1451
    new_name = hostname.name
1452
    self.ip = new_ip = hostname.ip
1453
    old_name = self.cfg.GetClusterName()
1454
    old_ip = self.cfg.GetMasterIP()
1455
    if new_name == old_name and new_ip == old_ip:
1456
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1457
                                 " cluster has changed")
1458
    if new_ip != old_ip:
1459
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1460
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1461
                                   " reachable on the network. Aborting." %
1462
                                   new_ip)
1463

    
1464
    self.op.name = new_name
1465

    
1466
  def Exec(self, feedback_fn):
1467
    """Rename the cluster.
1468

1469
    """
1470
    clustername = self.op.name
1471
    ip = self.ip
1472

    
1473
    # shutdown the master IP
1474
    master = self.cfg.GetMasterNode()
1475
    result = self.rpc.call_node_stop_master(master, False)
1476
    if result.failed or not result.data:
1477
      raise errors.OpExecError("Could not disable the master role")
1478

    
1479
    try:
1480
      cluster = self.cfg.GetClusterInfo()
1481
      cluster.cluster_name = clustername
1482
      cluster.master_ip = ip
1483
      self.cfg.Update(cluster)
1484

    
1485
      # update the known hosts file
1486
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1487
      node_list = self.cfg.GetNodeList()
1488
      try:
1489
        node_list.remove(master)
1490
      except ValueError:
1491
        pass
1492
      result = self.rpc.call_upload_file(node_list,
1493
                                         constants.SSH_KNOWN_HOSTS_FILE)
1494
      for to_node, to_result in result.iteritems():
1495
        if to_result.failed or not to_result.data:
1496
          logging.error("Copy of file %s to node %s failed",
1497
                        constants.SSH_KNOWN_HOSTS_FILE, to_node)
1498

    
1499
    finally:
1500
      result = self.rpc.call_node_start_master(master, False, False)
1501
      if result.failed or not result.data:
1502
        self.LogWarning("Could not re-enable the master role on"
1503
                        " the master, please restart manually.")
1504

    
1505

    
1506
def _RecursiveCheckIfLVMBased(disk):
1507
  """Check if the given disk or its children are lvm-based.
1508

1509
  @type disk: L{objects.Disk}
1510
  @param disk: the disk to check
1511
  @rtype: booleean
1512
  @return: boolean indicating whether a LD_LV dev_type was found or not
1513

1514
  """
1515
  if disk.children:
1516
    for chdisk in disk.children:
1517
      if _RecursiveCheckIfLVMBased(chdisk):
1518
        return True
1519
  return disk.dev_type == constants.LD_LV
1520

    
1521

    
1522
class LUSetClusterParams(LogicalUnit):
1523
  """Change the parameters of the cluster.
1524

1525
  """
1526
  HPATH = "cluster-modify"
1527
  HTYPE = constants.HTYPE_CLUSTER
1528
  _OP_REQP = []
1529
  REQ_BGL = False
1530

    
1531
  def CheckArguments(self):
1532
    """Check parameters
1533

1534
    """
1535
    if not hasattr(self.op, "candidate_pool_size"):
1536
      self.op.candidate_pool_size = None
1537
    if self.op.candidate_pool_size is not None:
1538
      try:
1539
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1540
      except (ValueError, TypeError), err:
1541
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1542
                                   str(err))
1543
      if self.op.candidate_pool_size < 1:
1544
        raise errors.OpPrereqError("At least one master candidate needed")
1545

    
1546
  def ExpandNames(self):
1547
    # FIXME: in the future maybe other cluster params won't require checking on
1548
    # all nodes to be modified.
1549
    self.needed_locks = {
1550
      locking.LEVEL_NODE: locking.ALL_SET,
1551
    }
1552
    self.share_locks[locking.LEVEL_NODE] = 1
1553

    
1554
  def BuildHooksEnv(self):
1555
    """Build hooks env.
1556

1557
    """
1558
    env = {
1559
      "OP_TARGET": self.cfg.GetClusterName(),
1560
      "NEW_VG_NAME": self.op.vg_name,
1561
      }
1562
    mn = self.cfg.GetMasterNode()
1563
    return env, [mn], [mn]
1564

    
1565
  def CheckPrereq(self):
1566
    """Check prerequisites.
1567

1568
    This checks whether the given params don't conflict and
1569
    if the given volume group is valid.
1570

1571
    """
1572
    if self.op.vg_name is not None and not self.op.vg_name:
1573
      instances = self.cfg.GetAllInstancesInfo().values()
1574
      for inst in instances:
1575
        for disk in inst.disks:
1576
          if _RecursiveCheckIfLVMBased(disk):
1577
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1578
                                       " lvm-based instances exist")
1579

    
1580
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1581

    
1582
    # if vg_name not None, checks given volume group on all nodes
1583
    if self.op.vg_name:
1584
      vglist = self.rpc.call_vg_list(node_list)
1585
      for node in node_list:
1586
        if vglist[node].failed:
1587
          # ignoring down node
1588
          self.LogWarning("Node %s unreachable/error, ignoring" % node)
1589
          continue
1590
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].data,
1591
                                              self.op.vg_name,
1592
                                              constants.MIN_VG_SIZE)
1593
        if vgstatus:
1594
          raise errors.OpPrereqError("Error on node '%s': %s" %
1595
                                     (node, vgstatus))
1596

    
1597
    self.cluster = cluster = self.cfg.GetClusterInfo()
1598
    # validate beparams changes
1599
    if self.op.beparams:
1600
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1601
      self.new_beparams = cluster.FillDict(
1602
        cluster.beparams[constants.BEGR_DEFAULT], self.op.beparams)
1603

    
1604
    # hypervisor list/parameters
1605
    self.new_hvparams = cluster.FillDict(cluster.hvparams, {})
1606
    if self.op.hvparams:
1607
      if not isinstance(self.op.hvparams, dict):
1608
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
1609
      for hv_name, hv_dict in self.op.hvparams.items():
1610
        if hv_name not in self.new_hvparams:
1611
          self.new_hvparams[hv_name] = hv_dict
1612
        else:
1613
          self.new_hvparams[hv_name].update(hv_dict)
1614

    
1615
    if self.op.enabled_hypervisors is not None:
1616
      self.hv_list = self.op.enabled_hypervisors
1617
    else:
1618
      self.hv_list = cluster.enabled_hypervisors
1619

    
1620
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
1621
      # either the enabled list has changed, or the parameters have, validate
1622
      for hv_name, hv_params in self.new_hvparams.items():
1623
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
1624
            (self.op.enabled_hypervisors and
1625
             hv_name in self.op.enabled_hypervisors)):
1626
          # either this is a new hypervisor, or its parameters have changed
1627
          hv_class = hypervisor.GetHypervisor(hv_name)
1628
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1629
          hv_class.CheckParameterSyntax(hv_params)
1630
          _CheckHVParams(self, node_list, hv_name, hv_params)
1631

    
1632
  def Exec(self, feedback_fn):
1633
    """Change the parameters of the cluster.
1634

1635
    """
1636
    if self.op.vg_name is not None:
1637
      new_volume = self.op.vg_name
1638
      if not new_volume:
1639
        new_volume = None
1640
      if new_volume != self.cfg.GetVGName():
1641
        self.cfg.SetVGName(new_volume)
1642
      else:
1643
        feedback_fn("Cluster LVM configuration already in desired"
1644
                    " state, not changing")
1645
    if self.op.hvparams:
1646
      self.cluster.hvparams = self.new_hvparams
1647
    if self.op.enabled_hypervisors is not None:
1648
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1649
    if self.op.beparams:
1650
      self.cluster.beparams[constants.BEGR_DEFAULT] = self.new_beparams
1651
    if self.op.candidate_pool_size is not None:
1652
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
1653
      # we need to update the pool size here, otherwise the save will fail
1654
      _AdjustCandidatePool(self)
1655

    
1656
    self.cfg.Update(self.cluster)
1657

    
1658

    
1659
class LURedistributeConfig(NoHooksLU):
1660
  """Force the redistribution of cluster configuration.
1661

1662
  This is a very simple LU.
1663

1664
  """
1665
  _OP_REQP = []
1666
  REQ_BGL = False
1667

    
1668
  def ExpandNames(self):
1669
    self.needed_locks = {
1670
      locking.LEVEL_NODE: locking.ALL_SET,
1671
    }
1672
    self.share_locks[locking.LEVEL_NODE] = 1
1673

    
1674
  def CheckPrereq(self):
1675
    """Check prerequisites.
1676

1677
    """
1678

    
1679
  def Exec(self, feedback_fn):
1680
    """Redistribute the configuration.
1681

1682
    """
1683
    self.cfg.Update(self.cfg.GetClusterInfo())
1684

    
1685

    
1686
def _WaitForSync(lu, instance, oneshot=False, unlock=False):
1687
  """Sleep and poll for an instance's disk to sync.
1688

1689
  """
1690
  if not instance.disks:
1691
    return True
1692

    
1693
  if not oneshot:
1694
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
1695

    
1696
  node = instance.primary_node
1697

    
1698
  for dev in instance.disks:
1699
    lu.cfg.SetDiskID(dev, node)
1700

    
1701
  retries = 0
1702
  degr_retries = 10 # in seconds, as we sleep 1 second each time
1703
  while True:
1704
    max_time = 0
1705
    done = True
1706
    cumul_degraded = False
1707
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
1708
    if rstats.failed or not rstats.data:
1709
      lu.LogWarning("Can't get any data from node %s", node)
1710
      retries += 1
1711
      if retries >= 10:
1712
        raise errors.RemoteError("Can't contact node %s for mirror data,"
1713
                                 " aborting." % node)
1714
      time.sleep(6)
1715
      continue
1716
    rstats = rstats.data
1717
    retries = 0
1718
    for i, mstat in enumerate(rstats):
1719
      if mstat is None:
1720
        lu.LogWarning("Can't compute data for node %s/%s",
1721
                           node, instance.disks[i].iv_name)
1722
        continue
1723
      # we ignore the ldisk parameter
1724
      perc_done, est_time, is_degraded, _ = mstat
1725
      cumul_degraded = cumul_degraded or (is_degraded and perc_done is None)
1726
      if perc_done is not None:
1727
        done = False
1728
        if est_time is not None:
1729
          rem_time = "%d estimated seconds remaining" % est_time
1730
          max_time = est_time
1731
        else:
1732
          rem_time = "no time estimate"
1733
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
1734
                        (instance.disks[i].iv_name, perc_done, rem_time))
1735

    
1736
    # if we're done but degraded, let's do a few small retries, to
1737
    # make sure we see a stable and not transient situation; therefore
1738
    # we force restart of the loop
1739
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
1740
      logging.info("Degraded disks found, %d retries left", degr_retries)
1741
      degr_retries -= 1
1742
      time.sleep(1)
1743
      continue
1744

    
1745
    if done or oneshot:
1746
      break
1747

    
1748
    time.sleep(min(60, max_time))
1749

    
1750
  if done:
1751
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
1752
  return not cumul_degraded
1753

    
1754

    
1755
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
1756
  """Check that mirrors are not degraded.
1757

1758
  The ldisk parameter, if True, will change the test from the
1759
  is_degraded attribute (which represents overall non-ok status for
1760
  the device(s)) to the ldisk (representing the local storage status).
1761

1762
  """
1763
  lu.cfg.SetDiskID(dev, node)
1764
  if ldisk:
1765
    idx = 6
1766
  else:
1767
    idx = 5
1768

    
1769
  result = True
1770
  if on_primary or dev.AssembleOnSecondary():
1771
    rstats = lu.rpc.call_blockdev_find(node, dev)
1772
    msg = rstats.RemoteFailMsg()
1773
    if msg:
1774
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
1775
      result = False
1776
    elif not rstats.payload:
1777
      lu.LogWarning("Can't find disk on node %s", node)
1778
      result = False
1779
    else:
1780
      result = result and (not rstats.payload[idx])
1781
  if dev.children:
1782
    for child in dev.children:
1783
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
1784

    
1785
  return result
1786

    
1787

    
1788
class LUDiagnoseOS(NoHooksLU):
1789
  """Logical unit for OS diagnose/query.
1790

1791
  """
1792
  _OP_REQP = ["output_fields", "names"]
1793
  REQ_BGL = False
1794
  _FIELDS_STATIC = utils.FieldSet()
1795
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status")
1796

    
1797
  def ExpandNames(self):
1798
    if self.op.names:
1799
      raise errors.OpPrereqError("Selective OS query not supported")
1800

    
1801
    _CheckOutputFields(static=self._FIELDS_STATIC,
1802
                       dynamic=self._FIELDS_DYNAMIC,
1803
                       selected=self.op.output_fields)
1804

    
1805
    # Lock all nodes, in shared mode
1806
    # Temporary removal of locks, should be reverted later
1807
    # TODO: reintroduce locks when they are lighter-weight
1808
    self.needed_locks = {}
1809
    #self.share_locks[locking.LEVEL_NODE] = 1
1810
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
1811

    
1812
  def CheckPrereq(self):
1813
    """Check prerequisites.
1814

1815
    """
1816

    
1817
  @staticmethod
1818
  def _DiagnoseByOS(node_list, rlist):
1819
    """Remaps a per-node return list into an a per-os per-node dictionary
1820

1821
    @param node_list: a list with the names of all nodes
1822
    @param rlist: a map with node names as keys and OS objects as values
1823

1824
    @rtype: dict
1825
    @return: a dictionary with osnames as keys and as value another map, with
1826
        nodes as keys and list of OS objects as values, eg::
1827

1828
          {"debian-etch": {"node1": [<object>,...],
1829
                           "node2": [<object>,]}
1830
          }
1831

1832
    """
1833
    all_os = {}
1834
    # we build here the list of nodes that didn't fail the RPC (at RPC
1835
    # level), so that nodes with a non-responding node daemon don't
1836
    # make all OSes invalid
1837
    good_nodes = [node_name for node_name in rlist
1838
                  if not rlist[node_name].failed]
1839
    for node_name, nr in rlist.iteritems():
1840
      if nr.failed or not nr.data:
1841
        continue
1842
      for os_obj in nr.data:
1843
        if os_obj.name not in all_os:
1844
          # build a list of nodes for this os containing empty lists
1845
          # for each node in node_list
1846
          all_os[os_obj.name] = {}
1847
          for nname in good_nodes:
1848
            all_os[os_obj.name][nname] = []
1849
        all_os[os_obj.name][node_name].append(os_obj)
1850
    return all_os
1851

    
1852
  def Exec(self, feedback_fn):
1853
    """Compute the list of OSes.
1854

1855
    """
1856
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
1857
    node_data = self.rpc.call_os_diagnose(valid_nodes)
1858
    if node_data == False:
1859
      raise errors.OpExecError("Can't gather the list of OSes")
1860
    pol = self._DiagnoseByOS(valid_nodes, node_data)
1861
    output = []
1862
    for os_name, os_data in pol.iteritems():
1863
      row = []
1864
      for field in self.op.output_fields:
1865
        if field == "name":
1866
          val = os_name
1867
        elif field == "valid":
1868
          val = utils.all([osl and osl[0] for osl in os_data.values()])
1869
        elif field == "node_status":
1870
          val = {}
1871
          for node_name, nos_list in os_data.iteritems():
1872
            val[node_name] = [(v.status, v.path) for v in nos_list]
1873
        else:
1874
          raise errors.ParameterError(field)
1875
        row.append(val)
1876
      output.append(row)
1877

    
1878
    return output
1879

    
1880

    
1881
class LURemoveNode(LogicalUnit):
1882
  """Logical unit for removing a node.
1883

1884
  """
1885
  HPATH = "node-remove"
1886
  HTYPE = constants.HTYPE_NODE
1887
  _OP_REQP = ["node_name"]
1888

    
1889
  def BuildHooksEnv(self):
1890
    """Build hooks env.
1891

1892
    This doesn't run on the target node in the pre phase as a failed
1893
    node would then be impossible to remove.
1894

1895
    """
1896
    env = {
1897
      "OP_TARGET": self.op.node_name,
1898
      "NODE_NAME": self.op.node_name,
1899
      }
1900
    all_nodes = self.cfg.GetNodeList()
1901
    all_nodes.remove(self.op.node_name)
1902
    return env, all_nodes, all_nodes
1903

    
1904
  def CheckPrereq(self):
1905
    """Check prerequisites.
1906

1907
    This checks:
1908
     - the node exists in the configuration
1909
     - it does not have primary or secondary instances
1910
     - it's not the master
1911

1912
    Any errors are signalled by raising errors.OpPrereqError.
1913

1914
    """
1915
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
1916
    if node is None:
1917
      raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
1918

    
1919
    instance_list = self.cfg.GetInstanceList()
1920

    
1921
    masternode = self.cfg.GetMasterNode()
1922
    if node.name == masternode:
1923
      raise errors.OpPrereqError("Node is the master node,"
1924
                                 " you need to failover first.")
1925

    
1926
    for instance_name in instance_list:
1927
      instance = self.cfg.GetInstanceInfo(instance_name)
1928
      if node.name in instance.all_nodes:
1929
        raise errors.OpPrereqError("Instance %s is still running on the node,"
1930
                                   " please remove first." % instance_name)
1931
    self.op.node_name = node.name
1932
    self.node = node
1933

    
1934
  def Exec(self, feedback_fn):
1935
    """Removes the node from the cluster.
1936

1937
    """
1938
    node = self.node
1939
    logging.info("Stopping the node daemon and removing configs from node %s",
1940
                 node.name)
1941

    
1942
    self.context.RemoveNode(node.name)
1943

    
1944
    self.rpc.call_node_leave_cluster(node.name)
1945

    
1946
    # Promote nodes to master candidate as needed
1947
    _AdjustCandidatePool(self)
1948

    
1949

    
1950
class LUQueryNodes(NoHooksLU):
1951
  """Logical unit for querying nodes.
1952

1953
  """
1954
  _OP_REQP = ["output_fields", "names", "use_locking"]
1955
  REQ_BGL = False
1956
  _FIELDS_DYNAMIC = utils.FieldSet(
1957
    "dtotal", "dfree",
1958
    "mtotal", "mnode", "mfree",
1959
    "bootid",
1960
    "ctotal", "cnodes", "csockets",
1961
    )
1962

    
1963
  _FIELDS_STATIC = utils.FieldSet(
1964
    "name", "pinst_cnt", "sinst_cnt",
1965
    "pinst_list", "sinst_list",
1966
    "pip", "sip", "tags",
1967
    "serial_no",
1968
    "master_candidate",
1969
    "master",
1970
    "offline",
1971
    "drained",
1972
    "role",
1973
    )
1974

    
1975
  def ExpandNames(self):
1976
    _CheckOutputFields(static=self._FIELDS_STATIC,
1977
                       dynamic=self._FIELDS_DYNAMIC,
1978
                       selected=self.op.output_fields)
1979

    
1980
    self.needed_locks = {}
1981
    self.share_locks[locking.LEVEL_NODE] = 1
1982

    
1983
    if self.op.names:
1984
      self.wanted = _GetWantedNodes(self, self.op.names)
1985
    else:
1986
      self.wanted = locking.ALL_SET
1987

    
1988
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
1989
    self.do_locking = self.do_node_query and self.op.use_locking
1990
    if self.do_locking:
1991
      # if we don't request only static fields, we need to lock the nodes
1992
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
1993

    
1994

    
1995
  def CheckPrereq(self):
1996
    """Check prerequisites.
1997

1998
    """
1999
    # The validation of the node list is done in the _GetWantedNodes,
2000
    # if non empty, and if empty, there's no validation to do
2001
    pass
2002

    
2003
  def Exec(self, feedback_fn):
2004
    """Computes the list of nodes and their attributes.
2005

2006
    """
2007
    all_info = self.cfg.GetAllNodesInfo()
2008
    if self.do_locking:
2009
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2010
    elif self.wanted != locking.ALL_SET:
2011
      nodenames = self.wanted
2012
      missing = set(nodenames).difference(all_info.keys())
2013
      if missing:
2014
        raise errors.OpExecError(
2015
          "Some nodes were removed before retrieving their data: %s" % missing)
2016
    else:
2017
      nodenames = all_info.keys()
2018

    
2019
    nodenames = utils.NiceSort(nodenames)
2020
    nodelist = [all_info[name] for name in nodenames]
2021

    
2022
    # begin data gathering
2023

    
2024
    if self.do_node_query:
2025
      live_data = {}
2026
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2027
                                          self.cfg.GetHypervisorType())
2028
      for name in nodenames:
2029
        nodeinfo = node_data[name]
2030
        if not nodeinfo.failed and nodeinfo.data:
2031
          nodeinfo = nodeinfo.data
2032
          fn = utils.TryConvert
2033
          live_data[name] = {
2034
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2035
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2036
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2037
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2038
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2039
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2040
            "bootid": nodeinfo.get('bootid', None),
2041
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2042
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2043
            }
2044
        else:
2045
          live_data[name] = {}
2046
    else:
2047
      live_data = dict.fromkeys(nodenames, {})
2048

    
2049
    node_to_primary = dict([(name, set()) for name in nodenames])
2050
    node_to_secondary = dict([(name, set()) for name in nodenames])
2051

    
2052
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2053
                             "sinst_cnt", "sinst_list"))
2054
    if inst_fields & frozenset(self.op.output_fields):
2055
      instancelist = self.cfg.GetInstanceList()
2056

    
2057
      for instance_name in instancelist:
2058
        inst = self.cfg.GetInstanceInfo(instance_name)
2059
        if inst.primary_node in node_to_primary:
2060
          node_to_primary[inst.primary_node].add(inst.name)
2061
        for secnode in inst.secondary_nodes:
2062
          if secnode in node_to_secondary:
2063
            node_to_secondary[secnode].add(inst.name)
2064

    
2065
    master_node = self.cfg.GetMasterNode()
2066

    
2067
    # end data gathering
2068

    
2069
    output = []
2070
    for node in nodelist:
2071
      node_output = []
2072
      for field in self.op.output_fields:
2073
        if field == "name":
2074
          val = node.name
2075
        elif field == "pinst_list":
2076
          val = list(node_to_primary[node.name])
2077
        elif field == "sinst_list":
2078
          val = list(node_to_secondary[node.name])
2079
        elif field == "pinst_cnt":
2080
          val = len(node_to_primary[node.name])
2081
        elif field == "sinst_cnt":
2082
          val = len(node_to_secondary[node.name])
2083
        elif field == "pip":
2084
          val = node.primary_ip
2085
        elif field == "sip":
2086
          val = node.secondary_ip
2087
        elif field == "tags":
2088
          val = list(node.GetTags())
2089
        elif field == "serial_no":
2090
          val = node.serial_no
2091
        elif field == "master_candidate":
2092
          val = node.master_candidate
2093
        elif field == "master":
2094
          val = node.name == master_node
2095
        elif field == "offline":
2096
          val = node.offline
2097
        elif field == "drained":
2098
          val = node.drained
2099
        elif self._FIELDS_DYNAMIC.Matches(field):
2100
          val = live_data[node.name].get(field, None)
2101
        elif field == "role":
2102
          if node.name == master_node:
2103
            val = "M"
2104
          elif node.master_candidate:
2105
            val = "C"
2106
          elif node.drained:
2107
            val = "D"
2108
          elif node.offline:
2109
            val = "O"
2110
          else:
2111
            val = "R"
2112
        else:
2113
          raise errors.ParameterError(field)
2114
        node_output.append(val)
2115
      output.append(node_output)
2116

    
2117
    return output
2118

    
2119

    
2120
class LUQueryNodeVolumes(NoHooksLU):
2121
  """Logical unit for getting volumes on node(s).
2122

2123
  """
2124
  _OP_REQP = ["nodes", "output_fields"]
2125
  REQ_BGL = False
2126
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2127
  _FIELDS_STATIC = utils.FieldSet("node")
2128

    
2129
  def ExpandNames(self):
2130
    _CheckOutputFields(static=self._FIELDS_STATIC,
2131
                       dynamic=self._FIELDS_DYNAMIC,
2132
                       selected=self.op.output_fields)
2133

    
2134
    self.needed_locks = {}
2135
    self.share_locks[locking.LEVEL_NODE] = 1
2136
    if not self.op.nodes:
2137
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2138
    else:
2139
      self.needed_locks[locking.LEVEL_NODE] = \
2140
        _GetWantedNodes(self, self.op.nodes)
2141

    
2142
  def CheckPrereq(self):
2143
    """Check prerequisites.
2144

2145
    This checks that the fields required are valid output fields.
2146

2147
    """
2148
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2149

    
2150
  def Exec(self, feedback_fn):
2151
    """Computes the list of nodes and their attributes.
2152

2153
    """
2154
    nodenames = self.nodes
2155
    volumes = self.rpc.call_node_volumes(nodenames)
2156

    
2157
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2158
             in self.cfg.GetInstanceList()]
2159

    
2160
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2161

    
2162
    output = []
2163
    for node in nodenames:
2164
      if node not in volumes or volumes[node].failed or not volumes[node].data:
2165
        continue
2166

    
2167
      node_vols = volumes[node].data[:]
2168
      node_vols.sort(key=lambda vol: vol['dev'])
2169

    
2170
      for vol in node_vols:
2171
        node_output = []
2172
        for field in self.op.output_fields:
2173
          if field == "node":
2174
            val = node
2175
          elif field == "phys":
2176
            val = vol['dev']
2177
          elif field == "vg":
2178
            val = vol['vg']
2179
          elif field == "name":
2180
            val = vol['name']
2181
          elif field == "size":
2182
            val = int(float(vol['size']))
2183
          elif field == "instance":
2184
            for inst in ilist:
2185
              if node not in lv_by_node[inst]:
2186
                continue
2187
              if vol['name'] in lv_by_node[inst][node]:
2188
                val = inst.name
2189
                break
2190
            else:
2191
              val = '-'
2192
          else:
2193
            raise errors.ParameterError(field)
2194
          node_output.append(str(val))
2195

    
2196
        output.append(node_output)
2197

    
2198
    return output
2199

    
2200

    
2201
class LUAddNode(LogicalUnit):
2202
  """Logical unit for adding node to the cluster.
2203

2204
  """
2205
  HPATH = "node-add"
2206
  HTYPE = constants.HTYPE_NODE
2207
  _OP_REQP = ["node_name"]
2208

    
2209
  def BuildHooksEnv(self):
2210
    """Build hooks env.
2211

2212
    This will run on all nodes before, and on all nodes + the new node after.
2213

2214
    """
2215
    env = {
2216
      "OP_TARGET": self.op.node_name,
2217
      "NODE_NAME": self.op.node_name,
2218
      "NODE_PIP": self.op.primary_ip,
2219
      "NODE_SIP": self.op.secondary_ip,
2220
      }
2221
    nodes_0 = self.cfg.GetNodeList()
2222
    nodes_1 = nodes_0 + [self.op.node_name, ]
2223
    return env, nodes_0, nodes_1
2224

    
2225
  def CheckPrereq(self):
2226
    """Check prerequisites.
2227

2228
    This checks:
2229
     - the new node is not already in the config
2230
     - it is resolvable
2231
     - its parameters (single/dual homed) matches the cluster
2232

2233
    Any errors are signalled by raising errors.OpPrereqError.
2234

2235
    """
2236
    node_name = self.op.node_name
2237
    cfg = self.cfg
2238

    
2239
    dns_data = utils.HostInfo(node_name)
2240

    
2241
    node = dns_data.name
2242
    primary_ip = self.op.primary_ip = dns_data.ip
2243
    secondary_ip = getattr(self.op, "secondary_ip", None)
2244
    if secondary_ip is None:
2245
      secondary_ip = primary_ip
2246
    if not utils.IsValidIP(secondary_ip):
2247
      raise errors.OpPrereqError("Invalid secondary IP given")
2248
    self.op.secondary_ip = secondary_ip
2249

    
2250
    node_list = cfg.GetNodeList()
2251
    if not self.op.readd and node in node_list:
2252
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2253
                                 node)
2254
    elif self.op.readd and node not in node_list:
2255
      raise errors.OpPrereqError("Node %s is not in the configuration" % node)
2256

    
2257
    for existing_node_name in node_list:
2258
      existing_node = cfg.GetNodeInfo(existing_node_name)
2259

    
2260
      if self.op.readd and node == existing_node_name:
2261
        if (existing_node.primary_ip != primary_ip or
2262
            existing_node.secondary_ip != secondary_ip):
2263
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2264
                                     " address configuration as before")
2265
        continue
2266

    
2267
      if (existing_node.primary_ip == primary_ip or
2268
          existing_node.secondary_ip == primary_ip or
2269
          existing_node.primary_ip == secondary_ip or
2270
          existing_node.secondary_ip == secondary_ip):
2271
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2272
                                   " existing node %s" % existing_node.name)
2273

    
2274
    # check that the type of the node (single versus dual homed) is the
2275
    # same as for the master
2276
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2277
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2278
    newbie_singlehomed = secondary_ip == primary_ip
2279
    if master_singlehomed != newbie_singlehomed:
2280
      if master_singlehomed:
2281
        raise errors.OpPrereqError("The master has no private ip but the"
2282
                                   " new node has one")
2283
      else:
2284
        raise errors.OpPrereqError("The master has a private ip but the"
2285
                                   " new node doesn't have one")
2286

    
2287
    # checks reachablity
2288
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2289
      raise errors.OpPrereqError("Node not reachable by ping")
2290

    
2291
    if not newbie_singlehomed:
2292
      # check reachability from my secondary ip to newbie's secondary ip
2293
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2294
                           source=myself.secondary_ip):
2295
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2296
                                   " based ping to noded port")
2297

    
2298
    cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2299
    if self.op.readd:
2300
      exceptions = [node]
2301
    else:
2302
      exceptions = []
2303
    mc_now, mc_max = self.cfg.GetMasterCandidateStats(exceptions)
2304
    # the new node will increase mc_max with one, so:
2305
    mc_max = min(mc_max + 1, cp_size)
2306
    self.master_candidate = mc_now < mc_max
2307

    
2308
    if self.op.readd:
2309
      self.new_node = self.cfg.GetNodeInfo(node)
2310
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
2311
    else:
2312
      self.new_node = objects.Node(name=node,
2313
                                   primary_ip=primary_ip,
2314
                                   secondary_ip=secondary_ip,
2315
                                   master_candidate=self.master_candidate,
2316
                                   offline=False, drained=False)
2317

    
2318
  def Exec(self, feedback_fn):
2319
    """Adds the new node to the cluster.
2320

2321
    """
2322
    new_node = self.new_node
2323
    node = new_node.name
2324

    
2325
    # for re-adds, reset the offline/drained/master-candidate flags;
2326
    # we need to reset here, otherwise offline would prevent RPC calls
2327
    # later in the procedure; this also means that if the re-add
2328
    # fails, we are left with a non-offlined, broken node
2329
    if self.op.readd:
2330
      new_node.drained = new_node.offline = False
2331
      self.LogInfo("Readding a node, the offline/drained flags were reset")
2332
      # if we demote the node, we do cleanup later in the procedure
2333
      new_node.master_candidate = self.master_candidate
2334

    
2335
    # notify the user about any possible mc promotion
2336
    if new_node.master_candidate:
2337
      self.LogInfo("Node will be a master candidate")
2338

    
2339
    # check connectivity
2340
    result = self.rpc.call_version([node])[node]
2341
    result.Raise()
2342
    if result.data:
2343
      if constants.PROTOCOL_VERSION == result.data:
2344
        logging.info("Communication to node %s fine, sw version %s match",
2345
                     node, result.data)
2346
      else:
2347
        raise errors.OpExecError("Version mismatch master version %s,"
2348
                                 " node version %s" %
2349
                                 (constants.PROTOCOL_VERSION, result.data))
2350
    else:
2351
      raise errors.OpExecError("Cannot get version from the new node")
2352

    
2353
    # setup ssh on node
2354
    logging.info("Copy ssh key to node %s", node)
2355
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
2356
    keyarray = []
2357
    keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
2358
                constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
2359
                priv_key, pub_key]
2360

    
2361
    for i in keyfiles:
2362
      f = open(i, 'r')
2363
      try:
2364
        keyarray.append(f.read())
2365
      finally:
2366
        f.close()
2367

    
2368
    result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
2369
                                    keyarray[2],
2370
                                    keyarray[3], keyarray[4], keyarray[5])
2371

    
2372
    msg = result.RemoteFailMsg()
2373
    if msg:
2374
      raise errors.OpExecError("Cannot transfer ssh keys to the"
2375
                               " new node: %s" % msg)
2376

    
2377
    # Add node to our /etc/hosts, and add key to known_hosts
2378
    utils.AddHostToEtcHosts(new_node.name)
2379

    
2380
    if new_node.secondary_ip != new_node.primary_ip:
2381
      result = self.rpc.call_node_has_ip_address(new_node.name,
2382
                                                 new_node.secondary_ip)
2383
      if result.failed or not result.data:
2384
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
2385
                                 " you gave (%s). Please fix and re-run this"
2386
                                 " command." % new_node.secondary_ip)
2387

    
2388
    node_verify_list = [self.cfg.GetMasterNode()]
2389
    node_verify_param = {
2390
      'nodelist': [node],
2391
      # TODO: do a node-net-test as well?
2392
    }
2393

    
2394
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2395
                                       self.cfg.GetClusterName())
2396
    for verifier in node_verify_list:
2397
      if result[verifier].failed or not result[verifier].data:
2398
        raise errors.OpExecError("Cannot communicate with %s's node daemon"
2399
                                 " for remote verification" % verifier)
2400
      if result[verifier].data['nodelist']:
2401
        for failed in result[verifier].data['nodelist']:
2402
          feedback_fn("ssh/hostname verification failed %s -> %s" %
2403
                      (verifier, result[verifier].data['nodelist'][failed]))
2404
        raise errors.OpExecError("ssh/hostname verification failed.")
2405

    
2406
    # Distribute updated /etc/hosts and known_hosts to all nodes,
2407
    # including the node just added
2408
    myself = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
2409
    dist_nodes = self.cfg.GetNodeList()
2410
    if not self.op.readd:
2411
      dist_nodes.append(node)
2412
    if myself.name in dist_nodes:
2413
      dist_nodes.remove(myself.name)
2414

    
2415
    logging.debug("Copying hosts and known_hosts to all nodes")
2416
    for fname in (constants.ETC_HOSTS, constants.SSH_KNOWN_HOSTS_FILE):
2417
      result = self.rpc.call_upload_file(dist_nodes, fname)
2418
      for to_node, to_result in result.iteritems():
2419
        if to_result.failed or not to_result.data:
2420
          logging.error("Copy of file %s to node %s failed", fname, to_node)
2421

    
2422
    to_copy = []
2423
    enabled_hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2424
    if constants.HTS_COPY_VNC_PASSWORD.intersection(enabled_hypervisors):
2425
      to_copy.append(constants.VNC_PASSWORD_FILE)
2426

    
2427
    for fname in to_copy:
2428
      result = self.rpc.call_upload_file([node], fname)
2429
      if result[node].failed or not result[node]:
2430
        logging.error("Could not copy file %s to node %s", fname, node)
2431

    
2432
    if self.op.readd:
2433
      self.context.ReaddNode(new_node)
2434
      # make sure we redistribute the config
2435
      self.cfg.Update(new_node)
2436
      # and make sure the new node will not have old files around
2437
      if not new_node.master_candidate:
2438
        result = self.rpc.call_node_demote_from_mc(new_node.name)
2439
        msg = result.RemoteFailMsg()
2440
        if msg:
2441
          self.LogWarning("Node failed to demote itself from master"
2442
                          " candidate status: %s" % msg)
2443
    else:
2444
      self.context.AddNode(new_node)
2445

    
2446

    
2447
class LUSetNodeParams(LogicalUnit):
2448
  """Modifies the parameters of a node.
2449

2450
  """
2451
  HPATH = "node-modify"
2452
  HTYPE = constants.HTYPE_NODE
2453
  _OP_REQP = ["node_name"]
2454
  REQ_BGL = False
2455

    
2456
  def CheckArguments(self):
2457
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2458
    if node_name is None:
2459
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2460
    self.op.node_name = node_name
2461
    _CheckBooleanOpField(self.op, 'master_candidate')
2462
    _CheckBooleanOpField(self.op, 'offline')
2463
    _CheckBooleanOpField(self.op, 'drained')
2464
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
2465
    if all_mods.count(None) == 3:
2466
      raise errors.OpPrereqError("Please pass at least one modification")
2467
    if all_mods.count(True) > 1:
2468
      raise errors.OpPrereqError("Can't set the node into more than one"
2469
                                 " state at the same time")
2470

    
2471
  def ExpandNames(self):
2472
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
2473

    
2474
  def BuildHooksEnv(self):
2475
    """Build hooks env.
2476

2477
    This runs on the master node.
2478

2479
    """
2480
    env = {
2481
      "OP_TARGET": self.op.node_name,
2482
      "MASTER_CANDIDATE": str(self.op.master_candidate),
2483
      "OFFLINE": str(self.op.offline),
2484
      "DRAINED": str(self.op.drained),
2485
      }
2486
    nl = [self.cfg.GetMasterNode(),
2487
          self.op.node_name]
2488
    return env, nl, nl
2489

    
2490
  def CheckPrereq(self):
2491
    """Check prerequisites.
2492

2493
    This only checks the instance list against the existing names.
2494

2495
    """
2496
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
2497

    
2498
    if ((self.op.master_candidate == False or self.op.offline == True or
2499
         self.op.drained == True) and node.master_candidate):
2500
      # we will demote the node from master_candidate
2501
      if self.op.node_name == self.cfg.GetMasterNode():
2502
        raise errors.OpPrereqError("The master node has to be a"
2503
                                   " master candidate, online and not drained")
2504
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2505
      num_candidates, _ = self.cfg.GetMasterCandidateStats()
2506
      if num_candidates <= cp_size:
2507
        msg = ("Not enough master candidates (desired"
2508
               " %d, new value will be %d)" % (cp_size, num_candidates-1))
2509
        if self.op.force:
2510
          self.LogWarning(msg)
2511
        else:
2512
          raise errors.OpPrereqError(msg)
2513

    
2514
    if (self.op.master_candidate == True and
2515
        ((node.offline and not self.op.offline == False) or
2516
         (node.drained and not self.op.drained == False))):
2517
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
2518
                                 " to master_candidate" % node.name)
2519

    
2520
    return
2521

    
2522
  def Exec(self, feedback_fn):
2523
    """Modifies a node.
2524

2525
    """
2526
    node = self.node
2527

    
2528
    result = []
2529
    changed_mc = False
2530

    
2531
    if self.op.offline is not None:
2532
      node.offline = self.op.offline
2533
      result.append(("offline", str(self.op.offline)))
2534
      if self.op.offline == True:
2535
        if node.master_candidate:
2536
          node.master_candidate = False
2537
          changed_mc = True
2538
          result.append(("master_candidate", "auto-demotion due to offline"))
2539
        if node.drained:
2540
          node.drained = False
2541
          result.append(("drained", "clear drained status due to offline"))
2542

    
2543
    if self.op.master_candidate is not None:
2544
      node.master_candidate = self.op.master_candidate
2545
      changed_mc = True
2546
      result.append(("master_candidate", str(self.op.master_candidate)))
2547
      if self.op.master_candidate == False:
2548
        rrc = self.rpc.call_node_demote_from_mc(node.name)
2549
        msg = rrc.RemoteFailMsg()
2550
        if msg:
2551
          self.LogWarning("Node failed to demote itself: %s" % msg)
2552

    
2553
    if self.op.drained is not None:
2554
      node.drained = self.op.drained
2555
      result.append(("drained", str(self.op.drained)))
2556
      if self.op.drained == True:
2557
        if node.master_candidate:
2558
          node.master_candidate = False
2559
          changed_mc = True
2560
          result.append(("master_candidate", "auto-demotion due to drain"))
2561
          rrc = self.rpc.call_node_demote_from_mc(node.name)
2562
          msg = rrc.RemoteFailMsg()
2563
          if msg:
2564
            self.LogWarning("Node failed to demote itself: %s" % msg)
2565
        if node.offline:
2566
          node.offline = False
2567
          result.append(("offline", "clear offline status due to drain"))
2568

    
2569
    # this will trigger configuration file update, if needed
2570
    self.cfg.Update(node)
2571
    # this will trigger job queue propagation or cleanup
2572
    if changed_mc:
2573
      self.context.ReaddNode(node)
2574

    
2575
    return result
2576

    
2577

    
2578
class LUQueryClusterInfo(NoHooksLU):
2579
  """Query cluster configuration.
2580

2581
  """
2582
  _OP_REQP = []
2583
  REQ_BGL = False
2584

    
2585
  def ExpandNames(self):
2586
    self.needed_locks = {}
2587

    
2588
  def CheckPrereq(self):
2589
    """No prerequsites needed for this LU.
2590

2591
    """
2592
    pass
2593

    
2594
  def Exec(self, feedback_fn):
2595
    """Return cluster config.
2596

2597
    """
2598
    cluster = self.cfg.GetClusterInfo()
2599
    result = {
2600
      "software_version": constants.RELEASE_VERSION,
2601
      "protocol_version": constants.PROTOCOL_VERSION,
2602
      "config_version": constants.CONFIG_VERSION,
2603
      "os_api_version": constants.OS_API_VERSION,
2604
      "export_version": constants.EXPORT_VERSION,
2605
      "architecture": (platform.architecture()[0], platform.machine()),
2606
      "name": cluster.cluster_name,
2607
      "master": cluster.master_node,
2608
      "default_hypervisor": cluster.default_hypervisor,
2609
      "enabled_hypervisors": cluster.enabled_hypervisors,
2610
      "hvparams": dict([(hypervisor, cluster.hvparams[hypervisor])
2611
                        for hypervisor in cluster.enabled_hypervisors]),
2612
      "beparams": cluster.beparams,
2613
      "candidate_pool_size": cluster.candidate_pool_size,
2614
      "default_bridge": cluster.default_bridge,
2615
      "master_netdev": cluster.master_netdev,
2616
      "volume_group_name": cluster.volume_group_name,
2617
      "file_storage_dir": cluster.file_storage_dir,
2618
      }
2619

    
2620
    return result
2621

    
2622

    
2623
class LUQueryConfigValues(NoHooksLU):
2624
  """Return configuration values.
2625

2626
  """
2627
  _OP_REQP = []
2628
  REQ_BGL = False
2629
  _FIELDS_DYNAMIC = utils.FieldSet()
2630
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag")
2631

    
2632
  def ExpandNames(self):
2633
    self.needed_locks = {}
2634

    
2635
    _CheckOutputFields(static=self._FIELDS_STATIC,
2636
                       dynamic=self._FIELDS_DYNAMIC,
2637
                       selected=self.op.output_fields)
2638

    
2639
  def CheckPrereq(self):
2640
    """No prerequisites.
2641

2642
    """
2643
    pass
2644

    
2645
  def Exec(self, feedback_fn):
2646
    """Dump a representation of the cluster config to the standard output.
2647

2648
    """
2649
    values = []
2650
    for field in self.op.output_fields:
2651
      if field == "cluster_name":
2652
        entry = self.cfg.GetClusterName()
2653
      elif field == "master_node":
2654
        entry = self.cfg.GetMasterNode()
2655
      elif field == "drain_flag":
2656
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
2657
      else:
2658
        raise errors.ParameterError(field)
2659
      values.append(entry)
2660
    return values
2661

    
2662

    
2663
class LUActivateInstanceDisks(NoHooksLU):
2664
  """Bring up an instance's disks.
2665

2666
  """
2667
  _OP_REQP = ["instance_name"]
2668
  REQ_BGL = False
2669

    
2670
  def ExpandNames(self):
2671
    self._ExpandAndLockInstance()
2672
    self.needed_locks[locking.LEVEL_NODE] = []
2673
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2674

    
2675
  def DeclareLocks(self, level):
2676
    if level == locking.LEVEL_NODE:
2677
      self._LockInstancesNodes()
2678

    
2679
  def CheckPrereq(self):
2680
    """Check prerequisites.
2681

2682
    This checks that the instance is in the cluster.
2683

2684
    """
2685
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2686
    assert self.instance is not None, \
2687
      "Cannot retrieve locked instance %s" % self.op.instance_name
2688
    _CheckNodeOnline(self, self.instance.primary_node)
2689
    if not hasattr(self.op, "ignore_size"):
2690
      self.op.ignore_size = False
2691

    
2692
  def Exec(self, feedback_fn):
2693
    """Activate the disks.
2694

2695
    """
2696
    disks_ok, disks_info = \
2697
              _AssembleInstanceDisks(self, self.instance,
2698
                                     ignore_size=self.op.ignore_size)
2699
    if not disks_ok:
2700
      raise errors.OpExecError("Cannot activate block devices")
2701

    
2702
    return disks_info
2703

    
2704

    
2705
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
2706
                           ignore_size=False):
2707
  """Prepare the block devices for an instance.
2708

2709
  This sets up the block devices on all nodes.
2710

2711
  @type lu: L{LogicalUnit}
2712
  @param lu: the logical unit on whose behalf we execute
2713
  @type instance: L{objects.Instance}
2714
  @param instance: the instance for whose disks we assemble
2715
  @type ignore_secondaries: boolean
2716
  @param ignore_secondaries: if true, errors on secondary nodes
2717
      won't result in an error return from the function
2718
  @type ignore_size: boolean
2719
  @param ignore_size: if true, the current known size of the disk
2720
      will not be used during the disk activation, useful for cases
2721
      when the size is wrong
2722
  @return: False if the operation failed, otherwise a list of
2723
      (host, instance_visible_name, node_visible_name)
2724
      with the mapping from node devices to instance devices
2725

2726
  """
2727
  device_info = []
2728
  disks_ok = True
2729
  iname = instance.name
2730
  # With the two passes mechanism we try to reduce the window of
2731
  # opportunity for the race condition of switching DRBD to primary
2732
  # before handshaking occured, but we do not eliminate it
2733

    
2734
  # The proper fix would be to wait (with some limits) until the
2735
  # connection has been made and drbd transitions from WFConnection
2736
  # into any other network-connected state (Connected, SyncTarget,
2737
  # SyncSource, etc.)
2738

    
2739
  # 1st pass, assemble on all nodes in secondary mode
2740
  for inst_disk in instance.disks:
2741
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2742
      if ignore_size:
2743
        node_disk = node_disk.Copy()
2744
        node_disk.UnsetSize()
2745
      lu.cfg.SetDiskID(node_disk, node)
2746
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
2747
      msg = result.RemoteFailMsg()
2748
      if msg:
2749
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
2750
                           " (is_primary=False, pass=1): %s",
2751
                           inst_disk.iv_name, node, msg)
2752
        if not ignore_secondaries:
2753
          disks_ok = False
2754

    
2755
  # FIXME: race condition on drbd migration to primary
2756

    
2757
  # 2nd pass, do only the primary node
2758
  for inst_disk in instance.disks:
2759
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2760
      if node != instance.primary_node:
2761
        continue
2762
      if ignore_size:
2763
        node_disk = node_disk.Copy()
2764
        node_disk.UnsetSize()
2765
      lu.cfg.SetDiskID(node_disk, node)
2766
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
2767
      msg = result.RemoteFailMsg()
2768
      if msg:
2769
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
2770
                           " (is_primary=True, pass=2): %s",
2771
                           inst_disk.iv_name, node, msg)
2772
        disks_ok = False
2773
    device_info.append((instance.primary_node, inst_disk.iv_name,
2774
                        result.payload))
2775

    
2776
  # leave the disks configured for the primary node
2777
  # this is a workaround that would be fixed better by
2778
  # improving the logical/physical id handling
2779
  for disk in instance.disks:
2780
    lu.cfg.SetDiskID(disk, instance.primary_node)
2781

    
2782
  return disks_ok, device_info
2783

    
2784

    
2785
def _StartInstanceDisks(lu, instance, force):
2786
  """Start the disks of an instance.
2787

2788
  """
2789
  disks_ok, dummy = _AssembleInstanceDisks(lu, instance,
2790
                                           ignore_secondaries=force)
2791
  if not disks_ok:
2792
    _ShutdownInstanceDisks(lu, instance)
2793
    if force is not None and not force:
2794
      lu.proc.LogWarning("", hint="If the message above refers to a"
2795
                         " secondary node,"
2796
                         " you can retry the operation using '--force'.")
2797
    raise errors.OpExecError("Disk consistency error")
2798

    
2799

    
2800
class LUDeactivateInstanceDisks(NoHooksLU):
2801
  """Shutdown an instance's disks.
2802

2803
  """
2804
  _OP_REQP = ["instance_name"]
2805
  REQ_BGL = False
2806

    
2807
  def ExpandNames(self):
2808
    self._ExpandAndLockInstance()
2809
    self.needed_locks[locking.LEVEL_NODE] = []
2810
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2811

    
2812
  def DeclareLocks(self, level):
2813
    if level == locking.LEVEL_NODE:
2814
      self._LockInstancesNodes()
2815

    
2816
  def CheckPrereq(self):
2817
    """Check prerequisites.
2818

2819
    This checks that the instance is in the cluster.
2820

2821
    """
2822
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2823
    assert self.instance is not None, \
2824
      "Cannot retrieve locked instance %s" % self.op.instance_name
2825

    
2826
  def Exec(self, feedback_fn):
2827
    """Deactivate the disks
2828

2829
    """
2830
    instance = self.instance
2831
    _SafeShutdownInstanceDisks(self, instance)
2832

    
2833

    
2834
def _SafeShutdownInstanceDisks(lu, instance):
2835
  """Shutdown block devices of an instance.
2836

2837
  This function checks if an instance is running, before calling
2838
  _ShutdownInstanceDisks.
2839

2840
  """
2841
  ins_l = lu.rpc.call_instance_list([instance.primary_node],
2842
                                      [instance.hypervisor])
2843
  ins_l = ins_l[instance.primary_node]
2844
  if ins_l.failed or not isinstance(ins_l.data, list):
2845
    raise errors.OpExecError("Can't contact node '%s'" %
2846
                             instance.primary_node)
2847

    
2848
  if instance.name in ins_l.data:
2849
    raise errors.OpExecError("Instance is running, can't shutdown"
2850
                             " block devices.")
2851

    
2852
  _ShutdownInstanceDisks(lu, instance)
2853

    
2854

    
2855
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
2856
  """Shutdown block devices of an instance.
2857

2858
  This does the shutdown on all nodes of the instance.
2859

2860
  If the ignore_primary is false, errors on the primary node are
2861
  ignored.
2862

2863
  """
2864
  all_result = True
2865
  for disk in instance.disks:
2866
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
2867
      lu.cfg.SetDiskID(top_disk, node)
2868
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
2869
      msg = result.RemoteFailMsg()
2870
      if msg:
2871
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
2872
                      disk.iv_name, node, msg)
2873
        if not ignore_primary or node != instance.primary_node:
2874
          all_result = False
2875
  return all_result
2876

    
2877

    
2878
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
2879
  """Checks if a node has enough free memory.
2880

2881
  This function check if a given node has the needed amount of free
2882
  memory. In case the node has less memory or we cannot get the
2883
  information from the node, this function raise an OpPrereqError
2884
  exception.
2885

2886
  @type lu: C{LogicalUnit}
2887
  @param lu: a logical unit from which we get configuration data
2888
  @type node: C{str}
2889
  @param node: the node to check
2890
  @type reason: C{str}
2891
  @param reason: string to use in the error message
2892
  @type requested: C{int}
2893
  @param requested: the amount of memory in MiB to check for
2894
  @type hypervisor_name: C{str}
2895
  @param hypervisor_name: the hypervisor to ask for memory stats
2896
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
2897
      we cannot check the node
2898

2899
  """
2900
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
2901
  nodeinfo[node].Raise()
2902
  free_mem = nodeinfo[node].data.get('memory_free')
2903
  if not isinstance(free_mem, int):
2904
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
2905
                             " was '%s'" % (node, free_mem))
2906
  if requested > free_mem:
2907
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
2908
                             " needed %s MiB, available %s MiB" %
2909
                             (node, reason, requested, free_mem))
2910

    
2911

    
2912
class LUStartupInstance(LogicalUnit):
2913
  """Starts an instance.
2914

2915
  """
2916
  HPATH = "instance-start"
2917
  HTYPE = constants.HTYPE_INSTANCE
2918
  _OP_REQP = ["instance_name", "force"]
2919
  REQ_BGL = False
2920

    
2921
  def ExpandNames(self):
2922
    self._ExpandAndLockInstance()
2923

    
2924
  def BuildHooksEnv(self):
2925
    """Build hooks env.
2926

2927
    This runs on master, primary and secondary nodes of the instance.
2928

2929
    """
2930
    env = {
2931
      "FORCE": self.op.force,
2932
      }
2933
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
2934
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
2935
    return env, nl, nl
2936

    
2937
  def CheckPrereq(self):
2938
    """Check prerequisites.
2939

2940
    This checks that the instance is in the cluster.
2941

2942
    """
2943
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2944
    assert self.instance is not None, \
2945
      "Cannot retrieve locked instance %s" % self.op.instance_name
2946

    
2947
    # extra beparams
2948
    self.beparams = getattr(self.op, "beparams", {})
2949
    if self.beparams:
2950
      if not isinstance(self.beparams, dict):
2951
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
2952
                                   " dict" % (type(self.beparams), ))
2953
      # fill the beparams dict
2954
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
2955
      self.op.beparams = self.beparams
2956

    
2957
    # extra hvparams
2958
    self.hvparams = getattr(self.op, "hvparams", {})
2959
    if self.hvparams:
2960
      if not isinstance(self.hvparams, dict):
2961
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
2962
                                   " dict" % (type(self.hvparams), ))
2963

    
2964
      # check hypervisor parameter syntax (locally)
2965
      cluster = self.cfg.GetClusterInfo()
2966
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
2967
      filled_hvp = cluster.FillDict(cluster.hvparams[instance.hypervisor],
2968
                                    instance.hvparams)
2969
      filled_hvp.update(self.hvparams)
2970
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
2971
      hv_type.CheckParameterSyntax(filled_hvp)
2972
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
2973
      self.op.hvparams = self.hvparams
2974

    
2975
    _CheckNodeOnline(self, instance.primary_node)
2976

    
2977
    bep = self.cfg.GetClusterInfo().FillBE(instance)
2978
    # check bridges existance
2979
    _CheckInstanceBridgesExist(self, instance)
2980

    
2981
    remote_info = self.rpc.call_instance_info(instance.primary_node,
2982
                                              instance.name,
2983
                                              instance.hypervisor)
2984
    remote_info.Raise()
2985
    if not remote_info.data:
2986
      _CheckNodeFreeMemory(self, instance.primary_node,
2987
                           "starting instance %s" % instance.name,
2988
                           bep[constants.BE_MEMORY], instance.hypervisor)
2989

    
2990
  def Exec(self, feedback_fn):
2991
    """Start the instance.
2992

2993
    """
2994
    instance = self.instance
2995
    force = self.op.force
2996

    
2997
    self.cfg.MarkInstanceUp(instance.name)
2998

    
2999
    node_current = instance.primary_node
3000

    
3001
    _StartInstanceDisks(self, instance, force)
3002

    
3003
    result = self.rpc.call_instance_start(node_current, instance,
3004
                                          self.hvparams, self.beparams)
3005
    msg = result.RemoteFailMsg()
3006
    if msg:
3007
      _ShutdownInstanceDisks(self, instance)
3008
      raise errors.OpExecError("Could not start instance: %s" % msg)
3009

    
3010

    
3011
class LURebootInstance(LogicalUnit):
3012
  """Reboot an instance.
3013

3014
  """
3015
  HPATH = "instance-reboot"
3016
  HTYPE = constants.HTYPE_INSTANCE
3017
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3018
  REQ_BGL = False
3019

    
3020
  def ExpandNames(self):
3021
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3022
                                   constants.INSTANCE_REBOOT_HARD,
3023
                                   constants.INSTANCE_REBOOT_FULL]:
3024
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3025
                                  (constants.INSTANCE_REBOOT_SOFT,
3026
                                   constants.INSTANCE_REBOOT_HARD,
3027
                                   constants.INSTANCE_REBOOT_FULL))
3028
    self._ExpandAndLockInstance()
3029

    
3030
  def BuildHooksEnv(self):
3031
    """Build hooks env.
3032

3033
    This runs on master, primary and secondary nodes of the instance.
3034

3035
    """
3036
    env = {
3037
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3038
      "REBOOT_TYPE": self.op.reboot_type,
3039
      }
3040
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3041
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3042
    return env, nl, nl
3043

    
3044
  def CheckPrereq(self):
3045
    """Check prerequisites.
3046

3047
    This checks that the instance is in the cluster.
3048

3049
    """
3050
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3051
    assert self.instance is not None, \
3052
      "Cannot retrieve locked instance %s" % self.op.instance_name
3053

    
3054
    _CheckNodeOnline(self, instance.primary_node)
3055

    
3056
    # check bridges existance
3057
    _CheckInstanceBridgesExist(self, instance)
3058

    
3059
  def Exec(self, feedback_fn):
3060
    """Reboot the instance.
3061

3062
    """
3063
    instance = self.instance
3064
    ignore_secondaries = self.op.ignore_secondaries
3065
    reboot_type = self.op.reboot_type
3066

    
3067
    node_current = instance.primary_node
3068

    
3069
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3070
                       constants.INSTANCE_REBOOT_HARD]:
3071
      for disk in instance.disks:
3072
        self.cfg.SetDiskID(disk, node_current)
3073
      result = self.rpc.call_instance_reboot(node_current, instance,
3074
                                             reboot_type)
3075
      msg = result.RemoteFailMsg()
3076
      if msg:
3077
        raise errors.OpExecError("Could not reboot instance: %s" % msg)
3078
    else:
3079
      result = self.rpc.call_instance_shutdown(node_current, instance)
3080
      msg = result.RemoteFailMsg()
3081
      if msg:
3082
        raise errors.OpExecError("Could not shutdown instance for"
3083
                                 " full reboot: %s" % msg)
3084
      _ShutdownInstanceDisks(self, instance)
3085
      _StartInstanceDisks(self, instance, ignore_secondaries)
3086
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3087
      msg = result.RemoteFailMsg()
3088
      if msg:
3089
        _ShutdownInstanceDisks(self, instance)
3090
        raise errors.OpExecError("Could not start instance for"
3091
                                 " full reboot: %s" % msg)
3092

    
3093
    self.cfg.MarkInstanceUp(instance.name)
3094

    
3095

    
3096
class LUShutdownInstance(LogicalUnit):
3097
  """Shutdown an instance.
3098

3099
  """
3100
  HPATH = "instance-stop"
3101
  HTYPE = constants.HTYPE_INSTANCE
3102
  _OP_REQP = ["instance_name"]
3103
  REQ_BGL = False
3104

    
3105
  def ExpandNames(self):
3106
    self._ExpandAndLockInstance()
3107

    
3108
  def BuildHooksEnv(self):
3109
    """Build hooks env.
3110

3111
    This runs on master, primary and secondary nodes of the instance.
3112

3113
    """
3114
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3115
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3116
    return env, nl, nl
3117

    
3118
  def CheckPrereq(self):
3119
    """Check prerequisites.
3120

3121
    This checks that the instance is in the cluster.
3122

3123
    """
3124
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3125
    assert self.instance is not None, \
3126
      "Cannot retrieve locked instance %s" % self.op.instance_name
3127
    _CheckNodeOnline(self, self.instance.primary_node)
3128

    
3129
  def Exec(self, feedback_fn):
3130
    """Shutdown the instance.
3131

3132
    """
3133
    instance = self.instance
3134
    node_current = instance.primary_node
3135
    self.cfg.MarkInstanceDown(instance.name)
3136
    result = self.rpc.call_instance_shutdown(node_current, instance)
3137
    msg = result.RemoteFailMsg()
3138
    if msg:
3139
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3140

    
3141
    _ShutdownInstanceDisks(self, instance)
3142

    
3143

    
3144
class LUReinstallInstance(LogicalUnit):
3145
  """Reinstall an instance.
3146

3147
  """
3148
  HPATH = "instance-reinstall"
3149
  HTYPE = constants.HTYPE_INSTANCE
3150
  _OP_REQP = ["instance_name"]
3151
  REQ_BGL = False
3152

    
3153
  def ExpandNames(self):
3154
    self._ExpandAndLockInstance()
3155

    
3156
  def BuildHooksEnv(self):
3157
    """Build hooks env.
3158

3159
    This runs on master, primary and secondary nodes of the instance.
3160

3161
    """
3162
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3163
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3164
    return env, nl, nl
3165

    
3166
  def CheckPrereq(self):
3167
    """Check prerequisites.
3168

3169
    This checks that the instance is in the cluster and is not running.
3170

3171
    """
3172
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3173
    assert instance is not None, \
3174
      "Cannot retrieve locked instance %s" % self.op.instance_name
3175
    _CheckNodeOnline(self, instance.primary_node)
3176

    
3177
    if instance.disk_template == constants.DT_DISKLESS:
3178
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3179
                                 self.op.instance_name)
3180
    if instance.admin_up:
3181
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3182
                                 self.op.instance_name)
3183
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3184
                                              instance.name,
3185
                                              instance.hypervisor)
3186
    remote_info.Raise()
3187
    if remote_info.data:
3188
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3189
                                 (self.op.instance_name,
3190
                                  instance.primary_node))
3191

    
3192
    self.op.os_type = getattr(self.op, "os_type", None)
3193
    if self.op.os_type is not None:
3194
      # OS verification
3195
      pnode = self.cfg.GetNodeInfo(
3196
        self.cfg.ExpandNodeName(instance.primary_node))
3197
      if pnode is None:
3198
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
3199
                                   self.op.pnode)
3200
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3201
      result.Raise()
3202
      if not isinstance(result.data, objects.OS):
3203
        raise errors.OpPrereqError("OS '%s' not in supported OS list for"
3204
                                   " primary node"  % self.op.os_type)
3205

    
3206
    self.instance = instance
3207

    
3208
  def Exec(self, feedback_fn):
3209
    """Reinstall the instance.
3210

3211
    """
3212
    inst = self.instance
3213

    
3214
    if self.op.os_type is not None:
3215
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3216
      inst.os = self.op.os_type
3217
      self.cfg.Update(inst)
3218

    
3219
    _StartInstanceDisks(self, inst, None)
3220
    try:
3221
      feedback_fn("Running the instance OS create scripts...")
3222
      result = self.rpc.call_instance_os_add(inst.primary_node, inst)
3223
      msg = result.RemoteFailMsg()
3224
      if msg:
3225
        raise errors.OpExecError("Could not install OS for instance %s"
3226
                                 " on node %s: %s" %
3227
                                 (inst.name, inst.primary_node, msg))
3228
    finally:
3229
      _ShutdownInstanceDisks(self, inst)
3230

    
3231

    
3232
class LURenameInstance(LogicalUnit):
3233
  """Rename an instance.
3234

3235
  """
3236
  HPATH = "instance-rename"
3237
  HTYPE = constants.HTYPE_INSTANCE
3238
  _OP_REQP = ["instance_name", "new_name"]
3239

    
3240
  def BuildHooksEnv(self):
3241
    """Build hooks env.
3242

3243
    This runs on master, primary and secondary nodes of the instance.
3244

3245
    """
3246
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3247
    env["INSTANCE_NEW_NAME"] = self.op.new_name
3248
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3249
    return env, nl, nl
3250

    
3251
  def CheckPrereq(self):
3252
    """Check prerequisites.
3253

3254
    This checks that the instance is in the cluster and is not running.
3255

3256
    """
3257
    instance = self.cfg.GetInstanceInfo(
3258
      self.cfg.ExpandInstanceName(self.op.instance_name))
3259
    if instance is None:
3260
      raise errors.OpPrereqError("Instance '%s' not known" %
3261
                                 self.op.instance_name)
3262
    _CheckNodeOnline(self, instance.primary_node)
3263

    
3264
    if instance.admin_up:
3265
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3266
                                 self.op.instance_name)
3267
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3268
                                              instance.name,
3269
                                              instance.hypervisor)
3270
    remote_info.Raise()
3271
    if remote_info.data:
3272
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3273
                                 (self.op.instance_name,
3274
                                  instance.primary_node))
3275
    self.instance = instance
3276

    
3277
    # new name verification
3278
    name_info = utils.HostInfo(self.op.new_name)
3279

    
3280
    self.op.new_name = new_name = name_info.name
3281
    instance_list = self.cfg.GetInstanceList()
3282
    if new_name in instance_list:
3283
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3284
                                 new_name)
3285

    
3286
    if not getattr(self.op, "ignore_ip", False):
3287
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
3288
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
3289
                                   (name_info.ip, new_name))
3290

    
3291

    
3292
  def Exec(self, feedback_fn):
3293
    """Reinstall the instance.
3294

3295
    """
3296
    inst = self.instance
3297
    old_name = inst.name
3298

    
3299
    if inst.disk_template == constants.DT_FILE:
3300
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3301

    
3302
    self.cfg.RenameInstance(inst.name, self.op.new_name)
3303
    # Change the instance lock. This is definitely safe while we hold the BGL
3304
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
3305
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
3306

    
3307
    # re-read the instance from the configuration after rename
3308
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
3309

    
3310
    if inst.disk_template == constants.DT_FILE:
3311
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3312
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
3313
                                                     old_file_storage_dir,
3314
                                                     new_file_storage_dir)
3315
      result.Raise()
3316
      if not result.data:
3317
        raise errors.OpExecError("Could not connect to node '%s' to rename"
3318
                                 " directory '%s' to '%s' (but the instance"
3319
                                 " has been renamed in Ganeti)" % (
3320
                                 inst.primary_node, old_file_storage_dir,
3321
                                 new_file_storage_dir))
3322

    
3323
      if not result.data[0]:
3324
        raise errors.OpExecError("Could not rename directory '%s' to '%s'"
3325
                                 " (but the instance has been renamed in"
3326
                                 " Ganeti)" % (old_file_storage_dir,
3327
                                               new_file_storage_dir))
3328

    
3329
    _StartInstanceDisks(self, inst, None)
3330
    try:
3331
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
3332
                                                 old_name)
3333
      msg = result.RemoteFailMsg()
3334
      if msg:
3335
        msg = ("Could not run OS rename script for instance %s on node %s"
3336
               " (but the instance has been renamed in Ganeti): %s" %
3337
               (inst.name, inst.primary_node, msg))
3338
        self.proc.LogWarning(msg)
3339
    finally:
3340
      _ShutdownInstanceDisks(self, inst)
3341

    
3342

    
3343
class LURemoveInstance(LogicalUnit):
3344
  """Remove an instance.
3345

3346
  """
3347
  HPATH = "instance-remove"
3348
  HTYPE = constants.HTYPE_INSTANCE
3349
  _OP_REQP = ["instance_name", "ignore_failures"]
3350
  REQ_BGL = False
3351

    
3352
  def ExpandNames(self):
3353
    self._ExpandAndLockInstance()
3354
    self.needed_locks[locking.LEVEL_NODE] = []
3355
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3356

    
3357
  def DeclareLocks(self, level):
3358
    if level == locking.LEVEL_NODE:
3359
      self._LockInstancesNodes()
3360

    
3361
  def BuildHooksEnv(self):
3362
    """Build hooks env.
3363

3364
    This runs on master, primary and secondary nodes of the instance.
3365

3366
    """
3367
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3368
    nl = [self.cfg.GetMasterNode()]
3369
    return env, nl, nl
3370

    
3371
  def CheckPrereq(self):
3372
    """Check prerequisites.
3373

3374
    This checks that the instance is in the cluster.
3375

3376
    """
3377
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3378
    assert self.instance is not None, \
3379
      "Cannot retrieve locked instance %s" % self.op.instance_name
3380

    
3381
  def Exec(self, feedback_fn):
3382
    """Remove the instance.
3383

3384
    """
3385
    instance = self.instance
3386
    logging.info("Shutting down instance %s on node %s",
3387
                 instance.name, instance.primary_node)
3388

    
3389
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
3390
    msg = result.RemoteFailMsg()
3391
    if msg:
3392
      if self.op.ignore_failures:
3393
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
3394
      else:
3395
        raise errors.OpExecError("Could not shutdown instance %s on"
3396
                                 " node %s: %s" %
3397
                                 (instance.name, instance.primary_node, msg))
3398

    
3399
    logging.info("Removing block devices for instance %s", instance.name)
3400

    
3401
    if not _RemoveDisks(self, instance):
3402
      if self.op.ignore_failures:
3403
        feedback_fn("Warning: can't remove instance's disks")
3404
      else:
3405
        raise errors.OpExecError("Can't remove instance's disks")
3406

    
3407
    logging.info("Removing instance %s out of cluster config", instance.name)
3408

    
3409
    self.cfg.RemoveInstance(instance.name)
3410
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
3411

    
3412

    
3413
class LUQueryInstances(NoHooksLU):
3414
  """Logical unit for querying instances.
3415

3416
  """
3417
  _OP_REQP = ["output_fields", "names", "use_locking"]
3418
  REQ_BGL = False
3419
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
3420
                                    "admin_state",
3421
                                    "disk_template", "ip", "mac", "bridge",
3422
                                    "sda_size", "sdb_size", "vcpus", "tags",
3423
                                    "network_port", "beparams",
3424
                                    r"(disk)\.(size)/([0-9]+)",
3425
                                    r"(disk)\.(sizes)", "disk_usage",
3426
                                    r"(nic)\.(mac|ip|bridge)/([0-9]+)",
3427
                                    r"(nic)\.(macs|ips|bridges)",
3428
                                    r"(disk|nic)\.(count)",
3429
                                    "serial_no", "hypervisor", "hvparams",] +
3430
                                  ["hv/%s" % name
3431
                                   for name in constants.HVS_PARAMETERS] +
3432
                                  ["be/%s" % name
3433
                                   for name in constants.BES_PARAMETERS])
3434
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
3435

    
3436

    
3437
  def ExpandNames(self):
3438
    _CheckOutputFields(static=self._FIELDS_STATIC,
3439
                       dynamic=self._FIELDS_DYNAMIC,
3440
                       selected=self.op.output_fields)
3441

    
3442
    self.needed_locks = {}
3443
    self.share_locks[locking.LEVEL_INSTANCE] = 1
3444
    self.share_locks[locking.LEVEL_NODE] = 1
3445

    
3446
    if self.op.names:
3447
      self.wanted = _GetWantedInstances(self, self.op.names)
3448
    else:
3449
      self.wanted = locking.ALL_SET
3450

    
3451
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3452
    self.do_locking = self.do_node_query and self.op.use_locking
3453
    if self.do_locking:
3454
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3455
      self.needed_locks[locking.LEVEL_NODE] = []
3456
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3457

    
3458
  def DeclareLocks(self, level):
3459
    if level == locking.LEVEL_NODE and self.do_locking:
3460
      self._LockInstancesNodes()
3461

    
3462
  def CheckPrereq(self):
3463
    """Check prerequisites.
3464

3465
    """
3466
    pass
3467

    
3468
  def Exec(self, feedback_fn):
3469
    """Computes the list of nodes and their attributes.
3470

3471
    """
3472
    all_info = self.cfg.GetAllInstancesInfo()
3473
    if self.wanted == locking.ALL_SET:
3474
      # caller didn't specify instance names, so ordering is not important
3475
      if self.do_locking:
3476
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
3477
      else:
3478
        instance_names = all_info.keys()
3479
      instance_names = utils.NiceSort(instance_names)
3480
    else:
3481
      # caller did specify names, so we must keep the ordering
3482
      if self.do_locking:
3483
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
3484
      else:
3485
        tgt_set = all_info.keys()
3486
      missing = set(self.wanted).difference(tgt_set)
3487
      if missing:
3488
        raise errors.OpExecError("Some instances were removed before"
3489
                                 " retrieving their data: %s" % missing)
3490
      instance_names = self.wanted
3491

    
3492
    instance_list = [all_info[iname] for iname in instance_names]
3493

    
3494
    # begin data gathering
3495

    
3496
    nodes = frozenset([inst.primary_node for inst in instance_list])
3497
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
3498

    
3499
    bad_nodes = []
3500
    off_nodes = []
3501
    if self.do_node_query:
3502
      live_data = {}
3503
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
3504
      for name in nodes:
3505
        result = node_data[name]
3506
        if result.offline:
3507
          # offline nodes will be in both lists
3508
          off_nodes.append(name)
3509
        if result.failed:
3510
          bad_nodes.append(name)
3511
        else:
3512
          if result.data:
3513
            live_data.update(result.data)
3514
            # else no instance is alive
3515
    else:
3516
      live_data = dict([(name, {}) for name in instance_names])
3517

    
3518
    # end data gathering
3519

    
3520
    HVPREFIX = "hv/"
3521
    BEPREFIX = "be/"
3522
    output = []
3523
    for instance in instance_list:
3524
      iout = []
3525
      i_hv = self.cfg.GetClusterInfo().FillHV(instance)
3526
      i_be = self.cfg.GetClusterInfo().FillBE(instance)
3527
      for field in self.op.output_fields:
3528
        st_match = self._FIELDS_STATIC.Matches(field)
3529
        if field == "name":
3530
          val = instance.name
3531
        elif field == "os":
3532
          val = instance.os
3533
        elif field == "pnode":
3534
          val = instance.primary_node
3535
        elif field == "snodes":
3536
          val = list(instance.secondary_nodes)
3537
        elif field == "admin_state":
3538
          val = instance.admin_up
3539
        elif field == "oper_state":
3540
          if instance.primary_node in bad_nodes:
3541
            val = None
3542
          else:
3543
            val = bool(live_data.get(instance.name))
3544
        elif field == "status":
3545
          if instance.primary_node in off_nodes:
3546
            val = "ERROR_nodeoffline"
3547
          elif instance.primary_node in bad_nodes:
3548
            val = "ERROR_nodedown"
3549
          else:
3550
            running = bool(live_data.get(instance.name))
3551
            if running:
3552
              if instance.admin_up:
3553
                val = "running"
3554
              else:
3555
                val = "ERROR_up"
3556
            else:
3557
              if instance.admin_up:
3558
                val = "ERROR_down"
3559
              else:
3560
                val = "ADMIN_down"
3561
        elif field == "oper_ram":
3562
          if instance.primary_node in bad_nodes:
3563
            val = None
3564
          elif instance.name in live_data:
3565
            val = live_data[instance.name].get("memory", "?")
3566
          else:
3567
            val = "-"
3568
        elif field == "vcpus":
3569
          val = i_be[constants.BE_VCPUS]
3570
        elif field == "disk_template":
3571
          val = instance.disk_template
3572
        elif field == "ip":
3573
          if instance.nics:
3574
            val = instance.nics[0].ip
3575
          else:
3576
            val = None
3577
        elif field == "bridge":
3578
          if instance.nics:
3579
            val = instance.nics[0].bridge
3580
          else:
3581
            val = None
3582
        elif field == "mac":
3583
          if instance.nics:
3584
            val = instance.nics[0].mac
3585
          else:
3586
            val = None
3587
        elif field == "sda_size" or field == "sdb_size":
3588
          idx = ord(field[2]) - ord('a')
3589
          try:
3590
            val = instance.FindDisk(idx).size
3591
          except errors.OpPrereqError:
3592
            val = None
3593
        elif field == "disk_usage": # total disk usage per node
3594
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
3595
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
3596
        elif field == "tags":
3597
          val = list(instance.GetTags())
3598
        elif field == "serial_no":
3599
          val = instance.serial_no
3600
        elif field == "network_port":
3601
          val = instance.network_port
3602
        elif field == "hypervisor":
3603
          val = instance.hypervisor
3604
        elif field == "hvparams":
3605
          val = i_hv
3606
        elif (field.startswith(HVPREFIX) and
3607
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
3608
          val = i_hv.get(field[len(HVPREFIX):], None)
3609
        elif field == "beparams":
3610
          val = i_be
3611
        elif (field.startswith(BEPREFIX) and
3612
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
3613
          val = i_be.get(field[len(BEPREFIX):], None)
3614
        elif st_match and st_match.groups():
3615
          # matches a variable list
3616
          st_groups = st_match.groups()
3617
          if st_groups and st_groups[0] == "disk":
3618
            if st_groups[1] == "count":
3619
              val = len(instance.disks)
3620
            elif st_groups[1] == "sizes":
3621
              val = [disk.size for disk in instance.disks]
3622
            elif st_groups[1] == "size":
3623
              try:
3624
                val = instance.FindDisk(st_groups[2]).size
3625
              except errors.OpPrereqError:
3626
                val = None
3627
            else:
3628
              assert False, "Unhandled disk parameter"
3629
          elif st_groups[0] == "nic":
3630
            if st_groups[1] == "count":
3631
              val = len(instance.nics)
3632
            elif st_groups[1] == "macs":
3633
              val = [nic.mac for nic in instance.nics]
3634
            elif st_groups[1] == "ips":
3635
              val = [nic.ip for nic in instance.nics]
3636
            elif st_groups[1] == "bridges":
3637
              val = [nic.bridge for nic in instance.nics]
3638
            else:
3639
              # index-based item
3640
              nic_idx = int(st_groups[2])
3641
              if nic_idx >= len(instance.nics):
3642
                val = None
3643
              else:
3644
                if st_groups[1] == "mac":
3645
                  val = instance.nics[nic_idx].mac
3646
                elif st_groups[1] == "ip":
3647
                  val = instance.nics[nic_idx].ip
3648
                elif st_groups[1] == "bridge":
3649
                  val = instance.nics[nic_idx].bridge
3650
                else:
3651
                  assert False, "Unhandled NIC parameter"
3652
          else:
3653
            assert False, ("Declared but unhandled variable parameter '%s'" %
3654
                           field)
3655
        else:
3656
          assert False, "Declared but unhandled parameter '%s'" % field
3657
        iout.append(val)
3658
      output.append(iout)
3659

    
3660
    return output
3661

    
3662

    
3663
class LUFailoverInstance(LogicalUnit):
3664
  """Failover an instance.
3665

3666
  """
3667
  HPATH = "instance-failover"
3668
  HTYPE = constants.HTYPE_INSTANCE
3669
  _OP_REQP = ["instance_name", "ignore_consistency"]
3670
  REQ_BGL = False
3671

    
3672
  def ExpandNames(self):
3673
    self._ExpandAndLockInstance()
3674
    self.needed_locks[locking.LEVEL_NODE] = []
3675
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3676

    
3677
  def DeclareLocks(self, level):
3678
    if level == locking.LEVEL_NODE:
3679
      self._LockInstancesNodes()
3680

    
3681
  def BuildHooksEnv(self):
3682
    """Build hooks env.
3683

3684
    This runs on master, primary and secondary nodes of the instance.
3685

3686
    """
3687
    env = {
3688
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
3689
      }
3690
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3691
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
3692
    return env, nl, nl
3693

    
3694
  def CheckPrereq(self):
3695
    """Check prerequisites.
3696

3697
    This checks that the instance is in the cluster.
3698

3699
    """
3700
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3701
    assert self.instance is not None, \
3702
      "Cannot retrieve locked instance %s" % self.op.instance_name
3703

    
3704
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3705
    if instance.disk_template not in constants.DTS_NET_MIRROR:
3706
      raise errors.OpPrereqError("Instance's disk layout is not"
3707
                                 " network mirrored, cannot failover.")
3708

    
3709
    secondary_nodes = instance.secondary_nodes
3710
    if not secondary_nodes:
3711
      raise errors.ProgrammerError("no secondary node but using "
3712
                                   "a mirrored disk template")
3713

    
3714
    target_node = secondary_nodes[0]
3715
    _CheckNodeOnline(self, target_node)
3716
    _CheckNodeNotDrained(self, target_node)
3717

    
3718
    if instance.admin_up:
3719
      # check memory requirements on the secondary node
3720
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
3721
                           instance.name, bep[constants.BE_MEMORY],
3722
                           instance.hypervisor)
3723
    else:
3724
      self.LogInfo("Not checking memory on the secondary node as"
3725
                   " instance will not be started")
3726

    
3727
    # check bridge existance
3728
    brlist = [nic.bridge for nic in instance.nics]
3729
    result = self.rpc.call_bridges_exist(target_node, brlist)
3730
    result.Raise()
3731
    if not result.data:
3732
      raise errors.OpPrereqError("One or more target bridges %s does not"
3733
                                 " exist on destination node '%s'" %
3734
                                 (brlist, target_node))
3735

    
3736
  def Exec(self, feedback_fn):
3737
    """Failover an instance.
3738

3739
    The failover is done by shutting it down on its present node and
3740
    starting it on the secondary.
3741

3742
    """
3743
    instance = self.instance
3744

    
3745
    source_node = instance.primary_node
3746
    target_node = instance.secondary_nodes[0]
3747

    
3748
    feedback_fn("* checking disk consistency between source and target")
3749
    for dev in instance.disks:
3750
      # for drbd, these are drbd over lvm
3751
      if not _CheckDiskConsistency(self, dev, target_node, False):
3752
        if instance.admin_up and not self.op.ignore_consistency:
3753
          raise errors.OpExecError("Disk %s is degraded on target node,"
3754
                                   " aborting failover." % dev.iv_name)
3755

    
3756
    feedback_fn("* shutting down instance on source node")
3757
    logging.info("Shutting down instance %s on node %s",
3758
                 instance.name, source_node)
3759

    
3760
    result = self.rpc.call_instance_shutdown(source_node, instance)
3761
    msg = result.RemoteFailMsg()
3762
    if msg:
3763
      if self.op.ignore_consistency:
3764
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
3765
                             " Proceeding anyway. Please make sure node"
3766
                             " %s is down. Error details: %s",
3767
                             instance.name, source_node, source_node, msg)
3768
      else:
3769
        raise errors.OpExecError("Could not shutdown instance %s on"
3770
                                 " node %s: %s" %
3771
                                 (instance.name, source_node, msg))
3772

    
3773
    feedback_fn("* deactivating the instance's disks on source node")
3774
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
3775
      raise errors.OpExecError("Can't shut down the instance's disks.")
3776

    
3777
    instance.primary_node = target_node
3778
    # distribute new instance config to the other nodes
3779
    self.cfg.Update(instance)
3780

    
3781
    # Only start the instance if it's marked as up
3782
    if instance.admin_up:
3783
      feedback_fn("* activating the instance's disks on target node")
3784
      logging.info("Starting instance %s on node %s",
3785
                   instance.name, target_node)
3786

    
3787
      disks_ok, dummy = _AssembleInstanceDisks(self, instance,
3788
                                               ignore_secondaries=True)
3789
      if not disks_ok:
3790
        _ShutdownInstanceDisks(self, instance)
3791
        raise errors.OpExecError("Can't activate the instance's disks")
3792

    
3793
      feedback_fn("* starting the instance on the target node")
3794
      result = self.rpc.call_instance_start(target_node, instance, None, None)
3795
      msg = result.RemoteFailMsg()
3796
      if msg:
3797
        _ShutdownInstanceDisks(self, instance)
3798
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
3799
                                 (instance.name, target_node, msg))
3800

    
3801

    
3802
class LUMigrateInstance(LogicalUnit):
3803
  """Migrate an instance.
3804

3805
  This is migration without shutting down, compared to the failover,
3806
  which is done with shutdown.
3807

3808
  """
3809
  HPATH = "instance-migrate"
3810
  HTYPE = constants.HTYPE_INSTANCE
3811
  _OP_REQP = ["instance_name", "live", "cleanup"]
3812

    
3813
  REQ_BGL = False
3814

    
3815
  def ExpandNames(self):
3816
    self._ExpandAndLockInstance()
3817
    self.needed_locks[locking.LEVEL_NODE] = []
3818
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3819

    
3820
  def DeclareLocks(self, level):
3821
    if level == locking.LEVEL_NODE:
3822
      self._LockInstancesNodes()
3823

    
3824
  def BuildHooksEnv(self):
3825
    """Build hooks env.
3826

3827
    This runs on master, primary and secondary nodes of the instance.
3828

3829
    """
3830
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3831
    env["MIGRATE_LIVE"] = self.op.live
3832
    env["MIGRATE_CLEANUP"] = self.op.cleanup
3833
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
3834
    return env, nl, nl
3835

    
3836
  def CheckPrereq(self):
3837
    """Check prerequisites.
3838

3839
    This checks that the instance is in the cluster.
3840

3841
    """
3842
    instance = self.cfg.GetInstanceInfo(
3843
      self.cfg.ExpandInstanceName(self.op.instance_name))
3844
    if instance is None:
3845
      raise errors.OpPrereqError("Instance '%s' not known" %
3846
                                 self.op.instance_name)
3847

    
3848
    if instance.disk_template != constants.DT_DRBD8:
3849
      raise errors.OpPrereqError("Instance's disk layout is not"
3850
                                 " drbd8, cannot migrate.")
3851

    
3852
    secondary_nodes = instance.secondary_nodes
3853
    if not secondary_nodes:
3854
      raise errors.ConfigurationError("No secondary node but using"
3855
                                      " drbd8 disk template")
3856

    
3857
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
3858

    
3859
    target_node = secondary_nodes[0]
3860
    # check memory requirements on the secondary node
3861
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
3862
                         instance.name, i_be[constants.BE_MEMORY],
3863
                         instance.hypervisor)
3864

    
3865
    # check bridge existance
3866
    brlist = [nic.bridge for nic in instance.nics]
3867
    result = self.rpc.call_bridges_exist(target_node, brlist)
3868
    if result.failed or not result.data:
3869
      raise errors.OpPrereqError("One or more target bridges %s does not"
3870
                                 " exist on destination node '%s'" %
3871
                                 (brlist, target_node))
3872

    
3873
    if not self.op.cleanup:
3874
      _CheckNodeNotDrained(self, target_node)
3875
      result = self.rpc.call_instance_migratable(instance.primary_node,
3876
                                                 instance)
3877
      msg = result.RemoteFailMsg()
3878
      if msg:
3879
        raise errors.OpPrereqError("Can't migrate: %s - please use failover" %
3880
                                   msg)
3881

    
3882
    self.instance = instance
3883

    
3884
  def _WaitUntilSync(self):
3885
    """Poll with custom rpc for disk sync.
3886

3887
    This uses our own step-based rpc call.
3888

3889
    """
3890
    self.feedback_fn("* wait until resync is done")
3891
    all_done = False
3892
    while not all_done:
3893
      all_done = True
3894
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
3895
                                            self.nodes_ip,
3896
                                            self.instance.disks)
3897
      min_percent = 100
3898
      for node, nres in result.items():
3899
        msg = nres.RemoteFailMsg()
3900
        if msg:
3901
          raise errors.OpExecError("Cannot resync disks on node %s: %s" %
3902
                                   (node, msg))
3903
        node_done, node_percent = nres.payload
3904
        all_done = all_done and node_done
3905
        if node_percent is not None:
3906
          min_percent = min(min_percent, node_percent)
3907
      if not all_done:
3908
        if min_percent < 100:
3909
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
3910
        time.sleep(2)
3911

    
3912
  def _EnsureSecondary(self, node):
3913
    """Demote a node to secondary.
3914

3915
    """
3916
    self.feedback_fn("* switching node %s to secondary mode" % node)
3917

    
3918
    for dev in self.instance.disks:
3919
      self.cfg.SetDiskID(dev, node)
3920

    
3921
    result = self.rpc.call_blockdev_close(node, self.instance.name,
3922
                                          self.instance.disks)
3923
    msg = result.RemoteFailMsg()
3924
    if msg:
3925
      raise errors.OpExecError("Cannot change disk to secondary on node %s,"
3926
                               " error %s" % (node, msg))
3927

    
3928
  def _GoStandalone(self):
3929
    """Disconnect from the network.
3930

3931
    """
3932
    self.feedback_fn("* changing into standalone mode")
3933
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
3934
                                               self.instance.disks)
3935
    for node, nres in result.items():
3936
      msg = nres.RemoteFailMsg()
3937
      if msg:
3938
        raise errors.OpExecError("Cannot disconnect disks node %s,"
3939
                                 " error %s" % (node, msg))
3940

    
3941
  def _GoReconnect(self, multimaster):
3942
    """Reconnect to the network.
3943

3944
    """
3945
    if multimaster:
3946
      msg = "dual-master"
3947
    else:
3948
      msg = "single-master"
3949
    self.feedback_fn("* changing disks into %s mode" % msg)
3950
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
3951
                                           self.instance.disks,
3952
                                           self.instance.name, multimaster)
3953
    for node, nres in result.items():
3954
      msg = nres.RemoteFailMsg()
3955
      if msg:
3956
        raise errors.OpExecError("Cannot change disks config on node %s,"
3957
                                 " error: %s" % (node, msg))
3958

    
3959
  def _ExecCleanup(self):
3960
    """Try to cleanup after a failed migration.
3961

3962
    The cleanup is done by:
3963
      - check that the instance is running only on one node
3964
        (and update the config if needed)
3965
      - change disks on its secondary node to secondary
3966
      - wait until disks are fully synchronized
3967
      - disconnect from the network
3968
      - change disks into single-master mode
3969
      - wait again until disks are fully synchronized
3970

3971
    """
3972
    instance = self.instance
3973
    target_node = self.target_node
3974
    source_node = self.source_node
3975

    
3976
    # check running on only one node
3977
    self.feedback_fn("* checking where the instance actually runs"
3978
                     " (if this hangs, the hypervisor might be in"
3979
                     " a bad state)")
3980
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
3981
    for node, result in ins_l.items():
3982
      result.Raise()
3983
      if not isinstance(result.data, list):
3984
        raise errors.OpExecError("Can't contact node '%s'" % node)
3985

    
3986
    runningon_source = instance.name in ins_l[source_node].data
3987
    runningon_target = instance.name in ins_l[target_node].data
3988

    
3989
    if runningon_source and runningon_target:
3990
      raise errors.OpExecError("Instance seems to be running on two nodes,"
3991
                               " or the hypervisor is confused. You will have"
3992
                               " to ensure manually that it runs only on one"
3993
                               " and restart this operation.")
3994

    
3995
    if not (runningon_source or runningon_target):
3996
      raise errors.OpExecError("Instance does not seem to be running at all."
3997
                               " In this case, it's safer to repair by"
3998
                               " running 'gnt-instance stop' to ensure disk"
3999
                               " shutdown, and then restarting it.")
4000

    
4001
    if runningon_target:
4002
      # the migration has actually succeeded, we need to update the config
4003
      self.feedback_fn("* instance running on secondary node (%s),"
4004
                       " updating config" % target_node)
4005
      instance.primary_node = target_node
4006
      self.cfg.Update(instance)
4007
      demoted_node = source_node
4008
    else:
4009
      self.feedback_fn("* instance confirmed to be running on its"
4010
                       " primary node (%s)" % source_node)
4011
      demoted_node = target_node
4012

    
4013
    self._EnsureSecondary(demoted_node)
4014
    try:
4015
      self._WaitUntilSync()
4016
    except errors.OpExecError:
4017
      # we ignore here errors, since if the device is standalone, it
4018
      # won't be able to sync
4019
      pass
4020
    self._GoStandalone()
4021
    self._GoReconnect(False)
4022
    self._WaitUntilSync()
4023

    
4024
    self.feedback_fn("* done")
4025

    
4026
  def _RevertDiskStatus(self):
4027
    """Try to revert the disk status after a failed migration.
4028

4029
    """
4030
    target_node = self.target_node
4031
    try:
4032
      self._EnsureSecondary(target_node)
4033
      self._GoStandalone()
4034
      self._GoReconnect(False)
4035
      self._WaitUntilSync()
4036
    except errors.OpExecError, err:
4037
      self.LogWarning("Migration failed and I can't reconnect the"
4038
                      " drives: error '%s'\n"
4039
                      "Please look and recover the instance status" %
4040
                      str(err))
4041

    
4042
  def _AbortMigration(self):
4043
    """Call the hypervisor code to abort a started migration.
4044

4045
    """
4046
    instance = self.instance
4047
    target_node = self.target_node
4048
    migration_info = self.migration_info
4049

    
4050
    abort_result = self.rpc.call_finalize_migration(target_node,
4051
                                                    instance,
4052
                                                    migration_info,
4053
                                                    False)
4054
    abort_msg = abort_result.RemoteFailMsg()
4055
    if abort_msg:
4056
      logging.error("Aborting migration failed on target node %s: %s" %
4057
                    (target_node, abort_msg))
4058
      # Don't raise an exception here, as we stil have to try to revert the
4059
      # disk status, even if this step failed.
4060

    
4061
  def _ExecMigration(self):
4062
    """Migrate an instance.
4063

4064
    The migrate is done by:
4065
      - change the disks into dual-master mode
4066
      - wait until disks are fully synchronized again
4067
      - migrate the instance
4068
      - change disks on the new secondary node (the old primary) to secondary
4069
      - wait until disks are fully synchronized
4070
      - change disks into single-master mode
4071

4072
    """
4073
    instance = self.instance
4074
    target_node = self.target_node
4075
    source_node = self.source_node
4076

    
4077
    self.feedback_fn("* checking disk consistency between source and target")
4078
    for dev in instance.disks:
4079
      if not _CheckDiskConsistency(self, dev, target_node, False):
4080
        raise errors.OpExecError("Disk %s is degraded or not fully"
4081
                                 " synchronized on target node,"
4082
                                 " aborting migrate." % dev.iv_name)
4083

    
4084
    # First get the migration information from the remote node
4085
    result = self.rpc.call_migration_info(source_node, instance)
4086
    msg = result.RemoteFailMsg()
4087
    if msg:
4088
      log_err = ("Failed fetching source migration information from %s: %s" %
4089
                 (source_node, msg))
4090
      logging.error(log_err)
4091
      raise errors.OpExecError(log_err)
4092

    
4093
    self.migration_info = migration_info = result.payload
4094

    
4095
    # Then switch the disks to master/master mode
4096
    self._EnsureSecondary(target_node)
4097
    self._GoStandalone()
4098
    self._GoReconnect(True)
4099
    self._WaitUntilSync()
4100

    
4101
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
4102
    result = self.rpc.call_accept_instance(target_node,
4103
                                           instance,
4104
                                           migration_info,
4105
                                           self.nodes_ip[target_node])
4106

    
4107
    msg = result.RemoteFailMsg()
4108
    if msg:
4109
      logging.error("Instance pre-migration failed, trying to revert"
4110
                    " disk status: %s", msg)
4111
      self._AbortMigration()
4112
      self._RevertDiskStatus()
4113
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
4114
                               (instance.name, msg))
4115

    
4116
    self.feedback_fn("* migrating instance to %s" % target_node)
4117
    time.sleep(10)
4118
    result = self.rpc.call_instance_migrate(source_node, instance,
4119
                                            self.nodes_ip[target_node],
4120
                                            self.op.live)
4121
    msg = result.RemoteFailMsg()
4122
    if msg:
4123
      logging.error("Instance migration failed, trying to revert"
4124
                    " disk status: %s", msg)
4125
      self._AbortMigration()
4126
      self._RevertDiskStatus()
4127
      raise errors.OpExecError("Could not migrate instance %s: %s" %
4128
                               (instance.name, msg))
4129
    time.sleep(10)
4130

    
4131
    instance.primary_node = target_node
4132
    # distribute new instance config to the other nodes
4133
    self.cfg.Update(instance)
4134

    
4135
    result = self.rpc.call_finalize_migration(target_node,
4136
                                              instance,
4137
                                              migration_info,
4138
                                              True)
4139
    msg = result.RemoteFailMsg()
4140
    if msg:
4141
      logging.error("Instance migration succeeded, but finalization failed:"
4142
                    " %s" % msg)
4143
      raise errors.OpExecError("Could not finalize instance migration: %s" %
4144
                               msg)
4145

    
4146
    self._EnsureSecondary(source_node)
4147
    self._WaitUntilSync()
4148
    self._GoStandalone()
4149
    self._GoReconnect(False)
4150
    self._WaitUntilSync()
4151

    
4152
    self.feedback_fn("* done")
4153

    
4154
  def Exec(self, feedback_fn):
4155
    """Perform the migration.
4156

4157
    """
4158
    self.feedback_fn = feedback_fn
4159

    
4160
    self.source_node = self.instance.primary_node
4161
    self.target_node = self.instance.secondary_nodes[0]
4162
    self.all_nodes = [self.source_node, self.target_node]
4163
    self.nodes_ip = {
4164
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
4165
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
4166
      }
4167
    if self.op.cleanup:
4168
      return self._ExecCleanup()
4169
    else:
4170
      return self._ExecMigration()
4171

    
4172

    
4173
def _CreateBlockDev(lu, node, instance, device, force_create,
4174
                    info, force_open):
4175
  """Create a tree of block devices on a given node.
4176

4177
  If this device type has to be created on secondaries, create it and
4178
  all its children.
4179

4180
  If not, just recurse to children keeping the same 'force' value.
4181

4182
  @param lu: the lu on whose behalf we execute
4183
  @param node: the node on which to create the device
4184
  @type instance: L{objects.Instance}
4185
  @param instance: the instance which owns the device
4186
  @type device: L{objects.Disk}
4187
  @param device: the device to create
4188
  @type force_create: boolean
4189
  @param force_create: whether to force creation of this device; this
4190
      will be change to True whenever we find a device which has
4191
      CreateOnSecondary() attribute
4192
  @param info: the extra 'metadata' we should attach to the device
4193
      (this will be represented as a LVM tag)
4194
  @type force_open: boolean
4195
  @param force_open: this parameter will be passes to the
4196
      L{backend.BlockdevCreate} function where it specifies
4197
      whether we run on primary or not, and it affects both
4198
      the child assembly and the device own Open() execution
4199

4200
  """
4201
  if device.CreateOnSecondary():
4202
    force_create = True
4203

    
4204
  if device.children:
4205
    for child in device.children:
4206
      _CreateBlockDev(lu, node, instance, child, force_create,
4207
                      info, force_open)
4208

    
4209
  if not force_create:
4210
    return
4211

    
4212
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
4213

    
4214

    
4215
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
4216
  """Create a single block device on a given node.
4217

4218
  This will not recurse over children of the device, so they must be
4219
  created in advance.
4220

4221
  @param lu: the lu on whose behalf we execute
4222
  @param node: the node on which to create the device
4223
  @type instance: L{objects.Instance}
4224
  @param instance: the instance which owns the device
4225
  @type device: L{objects.Disk}
4226
  @param device: the device to create
4227
  @param info: the extra 'metadata' we should attach to the device
4228
      (this will be represented as a LVM tag)
4229
  @type force_open: boolean
4230
  @param force_open: this parameter will be passes to the
4231
      L{backend.BlockdevCreate} function where it specifies
4232
      whether we run on primary or not, and it affects both
4233
      the child assembly and the device own Open() execution
4234

4235
  """
4236
  lu.cfg.SetDiskID(device, node)
4237
  result = lu.rpc.call_blockdev_create(node, device, device.size,
4238
                                       instance.name, force_open, info)
4239
  msg = result.RemoteFailMsg()
4240
  if msg:
4241
    raise errors.OpExecError("Can't create block device %s on"
4242
                             " node %s for instance %s: %s" %
4243
                             (device, node, instance.name, msg))
4244
  if device.physical_id is None:
4245
    device.physical_id = result.payload
4246

    
4247

    
4248
def _GenerateUniqueNames(lu, exts):
4249
  """Generate a suitable LV name.
4250

4251
  This will generate a logical volume name for the given instance.
4252

4253
  """
4254
  results = []
4255
  for val in exts:
4256
    new_id = lu.cfg.GenerateUniqueID()
4257
    results.append("%s%s" % (new_id, val))
4258
  return results
4259

    
4260

    
4261
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
4262
                         p_minor, s_minor):
4263
  """Generate a drbd8 device complete with its children.
4264

4265
  """
4266
  port = lu.cfg.AllocatePort()
4267
  vgname = lu.cfg.GetVGName()
4268
  shared_secret = lu.cfg.GenerateDRBDSecret()
4269
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
4270
                          logical_id=(vgname, names[0]))
4271
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
4272
                          logical_id=(vgname, names[1]))
4273
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
4274
                          logical_id=(primary, secondary, port,
4275
                                      p_minor, s_minor,
4276
                                      shared_secret),
4277
                          children=[dev_data, dev_meta],
4278
                          iv_name=iv_name)
4279
  return drbd_dev
4280

    
4281

    
4282
def _GenerateDiskTemplate(lu, template_name,
4283
                          instance_name, primary_node,
4284
                          secondary_nodes, disk_info,
4285
                          file_storage_dir, file_driver,
4286
                          base_index):
4287
  """Generate the entire disk layout for a given template type.
4288

4289
  """
4290
  #TODO: compute space requirements
4291

    
4292
  vgname = lu.cfg.GetVGName()
4293
  disk_count = len(disk_info)
4294
  disks = []
4295
  if template_name == constants.DT_DISKLESS:
4296
    pass
4297
  elif template_name == constants.DT_PLAIN:
4298
    if len(secondary_nodes) != 0:
4299
      raise errors.ProgrammerError("Wrong template configuration")
4300

    
4301
    names = _GenerateUniqueNames(lu, [".disk%d" % i
4302
                                      for i in range(disk_count)])
4303
    for idx, disk in enumerate(disk_info):
4304
      disk_index = idx + base_index
4305
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
4306
                              logical_id=(vgname, names[idx]),
4307
                              iv_name="disk/%d" % disk_index,
4308
                              mode=disk["mode"])
4309
      disks.append(disk_dev)
4310
  elif template_name == constants.DT_DRBD8:
4311
    if len(secondary_nodes) != 1:
4312
      raise errors.ProgrammerError("Wrong template configuration")
4313
    remote_node = secondary_nodes[0]
4314
    minors = lu.cfg.AllocateDRBDMinor(
4315
      [primary_node, remote_node] * len(disk_info), instance_name)
4316

    
4317
    names = []
4318
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % i
4319
                                               for i in range(disk_count)]):
4320
      names.append(lv_prefix + "_data")
4321
      names.append(lv_prefix + "_meta")
4322
    for idx, disk in enumerate(disk_info):
4323
      disk_index = idx + base_index
4324
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
4325
                                      disk["size"], names[idx*2:idx*2+2],
4326
                                      "disk/%d" % disk_index,
4327
                                      minors[idx*2], minors[idx*2+1])
4328
      disk_dev.mode = disk["mode"]
4329
      disks.append(disk_dev)
4330
  elif template_name == constants.DT_FILE:
4331
    if len(secondary_nodes) != 0:
4332
      raise errors.ProgrammerError("Wrong template configuration")
4333

    
4334
    for idx, disk in enumerate(disk_info):
4335
      disk_index = idx + base_index
4336
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
4337
                              iv_name="disk/%d" % disk_index,
4338
                              logical_id=(file_driver,
4339
                                          "%s/disk%d" % (file_storage_dir,
4340
                                                         disk_index)),
4341
                              mode=disk["mode"])
4342
      disks.append(disk_dev)
4343
  else:
4344
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
4345
  return disks
4346

    
4347

    
4348
def _GetInstanceInfoText(instance):
4349
  """Compute that text that should be added to the disk's metadata.
4350

4351
  """
4352
  return "originstname+%s" % instance.name
4353

    
4354

    
4355
def _CreateDisks(lu, instance):
4356
  """Create all disks for an instance.
4357

4358
  This abstracts away some work from AddInstance.
4359

4360
  @type lu: L{LogicalUnit}
4361
  @param lu: the logical unit on whose behalf we execute
4362
  @type instance: L{objects.Instance}
4363
  @param instance: the instance whose disks we should create
4364
  @rtype: boolean
4365
  @return: the success of the creation
4366

4367
  """
4368
  info = _GetInstanceInfoText(instance)
4369
  pnode = instance.primary_node
4370

    
4371
  if instance.disk_template == constants.DT_FILE:
4372
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4373
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
4374

    
4375
    if result.failed or not result.data:
4376
      raise errors.OpExecError("Could not connect to node '%s'" % pnode)
4377

    
4378
    if not result.data[0]:
4379
      raise errors.OpExecError("Failed to create directory '%s'" %
4380
                               file_storage_dir)
4381

    
4382
  # Note: this needs to be kept in sync with adding of disks in
4383
  # LUSetInstanceParams
4384
  for device in instance.disks:
4385
    logging.info("Creating volume %s for instance %s",
4386
                 device.iv_name, instance.name)
4387
    #HARDCODE
4388
    for node in instance.all_nodes:
4389
      f_create = node == pnode
4390
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
4391

    
4392

    
4393
def _RemoveDisks(lu, instance):
4394
  """Remove all disks for an instance.
4395

4396
  This abstracts away some work from `AddInstance()` and
4397
  `RemoveInstance()`. Note that in case some of the devices couldn't
4398
  be removed, the removal will continue with the other ones (compare
4399
  with `_CreateDisks()`).
4400

4401
  @type lu: L{LogicalUnit}
4402
  @param lu: the logical unit on whose behalf we execute
4403
  @type instance: L{objects.Instance}
4404
  @param instance: the instance whose disks we should remove
4405
  @rtype: boolean
4406
  @return: the success of the removal
4407

4408
  """
4409
  logging.info("Removing block devices for instance %s", instance.name)
4410

    
4411
  all_result = True
4412
  for device in instance.disks:
4413
    for node, disk in device.ComputeNodeTree(instance.primary_node):
4414
      lu.cfg.SetDiskID(disk, node)
4415
      msg = lu.rpc.call_blockdev_remove(node, disk).RemoteFailMsg()
4416
      if msg:
4417
        lu.LogWarning("Could not remove block device %s on node %s,"
4418
                      " continuing anyway: %s", device.iv_name, node, msg)
4419
        all_result = False
4420

    
4421
  if instance.disk_template == constants.DT_FILE:
4422
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4423
    result = lu.rpc.call_file_storage_dir_remove(instance.primary_node,
4424
                                                 file_storage_dir)
4425
    if result.failed or not result.data:
4426
      logging.error("Could not remove directory '%s'", file_storage_dir)
4427
      all_result = False
4428

    
4429
  return all_result
4430

    
4431

    
4432
def _ComputeDiskSize(disk_template, disks):
4433
  """Compute disk size requirements in the volume group
4434

4435
  """
4436
  # Required free disk space as a function of disk and swap space
4437
  req_size_dict = {
4438
    constants.DT_DISKLESS: None,
4439
    constants.DT_PLAIN: sum(d["size"] for d in disks),
4440
    # 128 MB are added for drbd metadata for each disk
4441
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
4442
    constants.DT_FILE: None,
4443
  }
4444

    
4445
  if disk_template not in req_size_dict:
4446
    raise errors.ProgrammerError("Disk template '%s' size requirement"
4447
                                 " is unknown" %  disk_template)
4448

    
4449
  return req_size_dict[disk_template]
4450

    
4451

    
4452
def _CheckHVParams(lu, nodenames, hvname, hvparams):
4453
  """Hypervisor parameter validation.
4454

4455
  This function abstract the hypervisor parameter validation to be
4456
  used in both instance create and instance modify.
4457

4458
  @type lu: L{LogicalUnit}
4459
  @param lu: the logical unit for which we check
4460
  @type nodenames: list
4461
  @param nodenames: the list of nodes on which we should check
4462
  @type hvname: string
4463
  @param hvname: the name of the hypervisor we should use
4464
  @type hvparams: dict
4465
  @param hvparams: the parameters which we need to check
4466
  @raise errors.OpPrereqError: if the parameters are not valid
4467

4468
  """
4469
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
4470
                                                  hvname,
4471
                                                  hvparams)
4472
  for node in nodenames:
4473
    info = hvinfo[node]
4474
    if info.offline:
4475
      continue
4476
    msg = info.RemoteFailMsg()
4477
    if msg:
4478
      raise errors.OpPrereqError("Hypervisor parameter validation"
4479
                                 " failed on node %s: %s" % (node, msg))
4480

    
4481

    
4482
class LUCreateInstance(LogicalUnit):
4483
  """Create an instance.
4484

4485
  """
4486
  HPATH = "instance-add"
4487
  HTYPE = constants.HTYPE_INSTANCE
4488
  _OP_REQP = ["instance_name", "disks", "disk_template",
4489
              "mode", "start",
4490
              "wait_for_sync", "ip_check", "nics",
4491
              "hvparams", "beparams"]
4492
  REQ_BGL = False
4493

    
4494
  def _ExpandNode(self, node):
4495
    """Expands and checks one node name.
4496

4497
    """
4498
    node_full = self.cfg.ExpandNodeName(node)
4499
    if node_full is None:
4500
      raise errors.OpPrereqError("Unknown node %s" % node)
4501
    return node_full
4502

    
4503
  def ExpandNames(self):
4504
    """ExpandNames for CreateInstance.
4505

4506
    Figure out the right locks for instance creation.
4507

4508
    """
4509
    self.needed_locks = {}
4510

    
4511
    # set optional parameters to none if they don't exist
4512
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
4513
      if not hasattr(self.op, attr):
4514
        setattr(self.op, attr, None)
4515

    
4516
    # cheap checks, mostly valid constants given
4517

    
4518
    # verify creation mode
4519
    if self.op.mode not in (constants.INSTANCE_CREATE,
4520
                            constants.INSTANCE_IMPORT):
4521
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
4522
                                 self.op.mode)
4523

    
4524
    # disk template and mirror node verification
4525
    if self.op.disk_template not in constants.DISK_TEMPLATES:
4526
      raise errors.OpPrereqError("Invalid disk template name")
4527

    
4528
    if self.op.hypervisor is None:
4529
      self.op.hypervisor = self.cfg.GetHypervisorType()
4530

    
4531
    cluster = self.cfg.GetClusterInfo()
4532
    enabled_hvs = cluster.enabled_hypervisors
4533
    if self.op.hypervisor not in enabled_hvs:
4534
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
4535
                                 " cluster (%s)" % (self.op.hypervisor,
4536
                                  ",".join(enabled_hvs)))
4537

    
4538
    # check hypervisor parameter syntax (locally)
4539
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4540
    filled_hvp = cluster.FillDict(cluster.hvparams[self.op.hypervisor],
4541
                                  self.op.hvparams)
4542
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
4543
    hv_type.CheckParameterSyntax(filled_hvp)
4544
    self.hv_full = filled_hvp
4545

    
4546
    # fill and remember the beparams dict
4547
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4548
    self.be_full = cluster.FillDict(cluster.beparams[constants.BEGR_DEFAULT],
4549
                                    self.op.beparams)
4550

    
4551
    #### instance parameters check
4552

    
4553
    # instance name verification
4554
    hostname1 = utils.HostInfo(self.op.instance_name)
4555
    self.op.instance_name = instance_name = hostname1.name
4556

    
4557
    # this is just a preventive check, but someone might still add this
4558
    # instance in the meantime, and creation will fail at lock-add time
4559
    if instance_name in self.cfg.GetInstanceList():
4560
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4561
                                 instance_name)
4562

    
4563
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
4564

    
4565
    # NIC buildup
4566
    self.nics = []
4567
    for nic in self.op.nics:
4568
      # ip validity checks
4569
      ip = nic.get("ip", None)
4570
      if ip is None or ip.lower() == "none":
4571
        nic_ip = None
4572
      elif ip.lower() == constants.VALUE_AUTO:
4573
        nic_ip = hostname1.ip
4574
      else:
4575
        if not utils.IsValidIP(ip):
4576
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
4577
                                     " like a valid IP" % ip)
4578
        nic_ip = ip
4579

    
4580
      # MAC address verification
4581
      mac = nic.get("mac", constants.VALUE_AUTO)
4582
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
4583
        if not utils.IsValidMac(mac.lower()):
4584
          raise errors.OpPrereqError("Invalid MAC address specified: %s" %
4585
                                     mac)
4586
      # bridge verification
4587
      bridge = nic.get("bridge", None)
4588
      if bridge is None:
4589
        bridge = self.cfg.GetDefBridge()
4590
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, bridge=bridge))
4591

    
4592
    # disk checks/pre-build
4593
    self.disks = []
4594
    for disk in self.op.disks:
4595
      mode = disk.get("mode", constants.DISK_RDWR)
4596
      if mode not in constants.DISK_ACCESS_SET:
4597
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
4598
                                   mode)
4599
      size = disk.get("size", None)
4600
      if size is None:
4601
        raise errors.OpPrereqError("Missing disk size")
4602
      try:
4603
        size = int(size)
4604
      except ValueError:
4605
        raise errors.OpPrereqError("Invalid disk size '%s'" % size)
4606
      self.disks.append({"size": size, "mode": mode})
4607

    
4608
    # used in CheckPrereq for ip ping check
4609
    self.check_ip = hostname1.ip
4610

    
4611
    # file storage checks
4612
    if (self.op.file_driver and
4613
        not self.op.file_driver in constants.FILE_DRIVER):
4614
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
4615
                                 self.op.file_driver)
4616

    
4617
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
4618
      raise errors.OpPrereqError("File storage directory path not absolute")
4619

    
4620
    ### Node/iallocator related checks
4621
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
4622
      raise errors.OpPrereqError("One and only one of iallocator and primary"
4623
                                 " node must be given")
4624

    
4625
    if self.op.iallocator:
4626
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4627
    else:
4628
      self.op.pnode = self._ExpandNode(self.op.pnode)
4629
      nodelist = [self.op.pnode]
4630
      if self.op.snode is not None:
4631
        self.op.snode = self._ExpandNode(self.op.snode)
4632
        nodelist.append(self.op.snode)
4633
      self.needed_locks[locking.LEVEL_NODE] = nodelist
4634

    
4635
    # in case of import lock the source node too
4636
    if self.op.mode == constants.INSTANCE_IMPORT:
4637
      src_node = getattr(self.op, "src_node", None)
4638
      src_path = getattr(self.op, "src_path", None)
4639

    
4640
      if src_path is None:
4641
        self.op.src_path = src_path = self.op.instance_name
4642

    
4643
      if src_node is None:
4644
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4645
        self.op.src_node = None
4646
        if os.path.isabs(src_path):
4647
          raise errors.OpPrereqError("Importing an instance from an absolute"
4648
                                     " path requires a source node option.")
4649
      else:
4650
        self.op.src_node = src_node = self._ExpandNode(src_node)
4651
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4652
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
4653
        if not os.path.isabs(src_path):
4654
          self.op.src_path = src_path = \
4655
            os.path.join(constants.EXPORT_DIR, src_path)
4656

    
4657
    else: # INSTANCE_CREATE
4658
      if getattr(self.op, "os_type", None) is None:
4659
        raise errors.OpPrereqError("No guest OS specified")
4660

    
4661
  def _RunAllocator(self):
4662
    """Run the allocator based on input opcode.
4663

4664
    """
4665
    nics = [n.ToDict() for n in self.nics]
4666
    ial = IAllocator(self,
4667
                     mode=constants.IALLOCATOR_MODE_ALLOC,
4668
                     name=self.op.instance_name,
4669
                     disk_template=self.op.disk_template,
4670
                     tags=[],
4671
                     os=self.op.os_type,
4672
                     vcpus=self.be_full[constants.BE_VCPUS],
4673
                     mem_size=self.be_full[constants.BE_MEMORY],
4674
                     disks=self.disks,
4675
                     nics=nics,
4676
                     hypervisor=self.op.hypervisor,
4677
                     )
4678

    
4679
    ial.Run(self.op.iallocator)
4680

    
4681
    if not ial.success:
4682
      raise errors.OpPrereqError("Can't compute nodes using"
4683
                                 " iallocator '%s': %s" % (self.op.iallocator,
4684
                                                           ial.info))
4685
    if len(ial.nodes) != ial.required_nodes:
4686
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
4687
                                 " of nodes (%s), required %s" %
4688
                                 (self.op.iallocator, len(ial.nodes),
4689
                                  ial.required_nodes))
4690
    self.op.pnode = ial.nodes[0]
4691
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
4692
                 self.op.instance_name, self.op.iallocator,
4693
                 ", ".join(ial.nodes))
4694
    if ial.required_nodes == 2:
4695
      self.op.snode = ial.nodes[1]
4696

    
4697
  def BuildHooksEnv(self):
4698
    """Build hooks env.
4699

4700
    This runs on master, primary and secondary nodes of the instance.
4701

4702
    """
4703
    env = {
4704
      "ADD_MODE": self.op.mode,
4705
      }
4706
    if self.op.mode == constants.INSTANCE_IMPORT:
4707
      env["SRC_NODE"] = self.op.src_node
4708
      env["SRC_PATH"] = self.op.src_path
4709
      env["SRC_IMAGES"] = self.src_images
4710

    
4711
    env.update(_BuildInstanceHookEnv(
4712
      name=self.op.instance_name,
4713
      primary_node=self.op.pnode,
4714
      secondary_nodes=self.secondaries,
4715
      status=self.op.start,
4716
      os_type=self.op.os_type,
4717
      memory=self.be_full[constants.BE_MEMORY],
4718
      vcpus=self.be_full[constants.BE_VCPUS],
4719
      nics=[(n.ip, n.bridge, n.mac) for n in self.nics],
4720
      disk_template=self.op.disk_template,
4721
      disks=[(d["size"], d["mode"]) for d in self.disks],
4722
      bep=self.be_full,
4723
      hvp=self.hv_full,
4724
      hypervisor=self.op.hypervisor,
4725
    ))
4726

    
4727
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
4728
          self.secondaries)
4729
    return env, nl, nl
4730

    
4731

    
4732
  def CheckPrereq(self):
4733
    """Check prerequisites.
4734

4735
    """
4736
    if (not self.cfg.GetVGName() and
4737
        self.op.disk_template not in constants.DTS_NOT_LVM):
4738
      raise errors.OpPrereqError("Cluster does not support lvm-based"
4739
                                 " instances")
4740

    
4741
    if self.op.mode == constants.INSTANCE_IMPORT:
4742
      src_node = self.op.src_node
4743
      src_path = self.op.src_path
4744

    
4745
      if src_node is None:
4746
        exp_list = self.rpc.call_export_list(
4747
          self.acquired_locks[locking.LEVEL_NODE])
4748
        found = False
4749
        for node in exp_list:
4750
          if not exp_list[node].failed and src_path in exp_list[node].data:
4751
            found = True
4752
            self.op.src_node = src_node = node
4753
            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
4754
                                                       src_path)
4755
            break
4756
        if not found:
4757
          raise errors.OpPrereqError("No export found for relative path %s" %
4758
                                      src_path)
4759

    
4760
      _CheckNodeOnline(self, src_node)
4761
      result = self.rpc.call_export_info(src_node, src_path)
4762
      result.Raise()
4763
      if not result.data:
4764
        raise errors.OpPrereqError("No export found in dir %s" % src_path)
4765

    
4766
      export_info = result.data
4767
      if not export_info.has_section(constants.INISECT_EXP):
4768
        raise errors.ProgrammerError("Corrupted export config")
4769

    
4770
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
4771
      if (int(ei_version) != constants.EXPORT_VERSION):
4772
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
4773
                                   (ei_version, constants.EXPORT_VERSION))
4774

    
4775
      # Check that the new instance doesn't have less disks than the export
4776
      instance_disks = len(self.disks)
4777
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
4778
      if instance_disks < export_disks:
4779
        raise errors.OpPrereqError("Not enough disks to import."
4780
                                   " (instance: %d, export: %d)" %
4781
                                   (instance_disks, export_disks))
4782

    
4783
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
4784
      disk_images = []
4785
      for idx in range(export_disks):
4786
        option = 'disk%d_dump' % idx
4787
        if export_info.has_option(constants.INISECT_INS, option):
4788
          # FIXME: are the old os-es, disk sizes, etc. useful?
4789
          export_name = export_info.get(constants.INISECT_INS, option)
4790
          image = os.path.join(src_path, export_name)
4791
          disk_images.append(image)
4792
        else:
4793
          disk_images.append(False)
4794

    
4795
      self.src_images = disk_images
4796

    
4797
      old_name = export_info.get(constants.INISECT_INS, 'name')
4798
      # FIXME: int() here could throw a ValueError on broken exports
4799
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
4800
      if self.op.instance_name == old_name:
4801
        for idx, nic in enumerate(self.nics):
4802
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
4803
            nic_mac_ini = 'nic%d_mac' % idx
4804
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
4805

    
4806
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
4807
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
4808
    if self.op.start and not self.op.ip_check:
4809
      raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
4810
                                 " adding an instance in start mode")
4811

    
4812
    if self.op.ip_check:
4813
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
4814
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4815
                                   (self.check_ip, self.op.instance_name))
4816

    
4817
    #### mac address generation
4818
    # By generating here the mac address both the allocator and the hooks get
4819
    # the real final mac address rather than the 'auto' or 'generate' value.
4820
    # There is a race condition between the generation and the instance object
4821
    # creation, which means that we know the mac is valid now, but we're not
4822
    # sure it will be when we actually add the instance. If things go bad
4823
    # adding the instance will abort because of a duplicate mac, and the
4824
    # creation job will fail.
4825
    for nic in self.nics:
4826
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
4827
        nic.mac = self.cfg.GenerateMAC()
4828

    
4829
    #### allocator run
4830

    
4831
    if self.op.iallocator is not None:
4832
      self._RunAllocator()
4833

    
4834
    #### node related checks
4835

    
4836
    # check primary node
4837
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
4838
    assert self.pnode is not None, \
4839
      "Cannot retrieve locked node %s" % self.op.pnode
4840
    if pnode.offline:
4841
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
4842
                                 pnode.name)
4843
    if pnode.drained:
4844
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
4845
                                 pnode.name)
4846

    
4847
    self.secondaries = []
4848

    
4849
    # mirror node verification
4850
    if self.op.disk_template in constants.DTS_NET_MIRROR:
4851
      if self.op.snode is None:
4852
        raise errors.OpPrereqError("The networked disk templates need"
4853
                                   " a mirror node")
4854
      if self.op.snode == pnode.name:
4855
        raise errors.OpPrereqError("The secondary node cannot be"
4856
                                   " the primary node.")
4857
      _CheckNodeOnline(self, self.op.snode)
4858
      _CheckNodeNotDrained(self, self.op.snode)
4859
      self.secondaries.append(self.op.snode)
4860

    
4861
    nodenames = [pnode.name] + self.secondaries
4862

    
4863
    req_size = _ComputeDiskSize(self.op.disk_template,
4864
                                self.disks)
4865

    
4866
    # Check lv size requirements
4867
    if req_size is not None:
4868
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
4869
                                         self.op.hypervisor)
4870
      for node in nodenames:
4871
        info = nodeinfo[node]
4872
        info.Raise()
4873
        info = info.data
4874
        if not info:
4875
          raise errors.OpPrereqError("Cannot get current information"
4876
                                     " from node '%s'" % node)
4877
        vg_free = info.get('vg_free', None)
4878
        if not isinstance(vg_free, int):
4879
          raise errors.OpPrereqError("Can't compute free disk space on"
4880
                                     " node %s" % node)
4881
        if req_size > info['vg_free']:
4882
          raise errors.OpPrereqError("Not enough disk space on target node %s."
4883
                                     " %d MB available, %d MB required" %
4884
                                     (node, info['vg_free'], req_size))
4885

    
4886
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
4887

    
4888
    # os verification
4889
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
4890
    result.Raise()
4891
    if not isinstance(result.data, objects.OS) or not result.data:
4892
      raise errors.OpPrereqError("OS '%s' not in supported os list for"
4893
                                 " primary node"  % self.op.os_type)
4894

    
4895
    # bridge check on primary node
4896
    bridges = [n.bridge for n in self.nics]
4897
    result = self.rpc.call_bridges_exist(self.pnode.name, bridges)
4898
    result.Raise()
4899
    if not result.data:
4900
      raise errors.OpPrereqError("One of the target bridges '%s' does not"
4901
                                 " exist on destination node '%s'" %
4902
                                 (",".join(bridges), pnode.name))
4903

    
4904
    # memory check on primary node
4905
    if self.op.start:
4906
      _CheckNodeFreeMemory(self, self.pnode.name,
4907
                           "creating instance %s" % self.op.instance_name,
4908
                           self.be_full[constants.BE_MEMORY],
4909
                           self.op.hypervisor)
4910

    
4911
  def Exec(self, feedback_fn):
4912
    """Create and add the instance to the cluster.
4913

4914
    """
4915
    instance = self.op.instance_name
4916
    pnode_name = self.pnode.name
4917

    
4918
    ht_kind = self.op.hypervisor
4919
    if ht_kind in constants.HTS_REQ_PORT:
4920
      network_port = self.cfg.AllocatePort()
4921
    else:
4922
      network_port = None
4923

    
4924
    ##if self.op.vnc_bind_address is None:
4925
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
4926

    
4927
    # this is needed because os.path.join does not accept None arguments
4928
    if self.op.file_storage_dir is None:
4929
      string_file_storage_dir = ""
4930
    else:
4931
      string_file_storage_dir = self.op.file_storage_dir
4932

    
4933
    # build the full file storage dir path
4934
    file_storage_dir = os.path.normpath(os.path.join(
4935
                                        self.cfg.GetFileStorageDir(),
4936
                                        string_file_storage_dir, instance))
4937

    
4938

    
4939
    disks = _GenerateDiskTemplate(self,
4940
                                  self.op.disk_template,
4941
                                  instance, pnode_name,
4942
                                  self.secondaries,
4943
                                  self.disks,
4944
                                  file_storage_dir,
4945
                                  self.op.file_driver,
4946
                                  0)
4947

    
4948
    iobj = objects.Instance(name=instance, os=self.op.os_type,
4949
                            primary_node=pnode_name,
4950
                            nics=self.nics, disks=disks,
4951
                            disk_template=self.op.disk_template,
4952
                            admin_up=False,
4953
                            network_port=network_port,
4954
                            beparams=self.op.beparams,
4955
                            hvparams=self.op.hvparams,
4956
                            hypervisor=self.op.hypervisor,
4957
                            )
4958

    
4959
    feedback_fn("* creating instance disks...")
4960
    try:
4961
      _CreateDisks(self, iobj)
4962
    except errors.OpExecError:
4963
      self.LogWarning("Device creation failed, reverting...")
4964
      try:
4965
        _RemoveDisks(self, iobj)
4966
      finally:
4967
        self.cfg.ReleaseDRBDMinors(instance)
4968
        raise
4969

    
4970
    feedback_fn("adding instance %s to cluster config" % instance)
4971

    
4972
    self.cfg.AddInstance(iobj)
4973
    # Declare that we don't want to remove the instance lock anymore, as we've
4974
    # added the instance to the config
4975
    del self.remove_locks[locking.LEVEL_INSTANCE]
4976
    # Unlock all the nodes
4977
    if self.op.mode == constants.INSTANCE_IMPORT:
4978
      nodes_keep = [self.op.src_node]
4979
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
4980
                       if node != self.op.src_node]
4981
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
4982
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
4983
    else:
4984
      self.context.glm.release(locking.LEVEL_NODE)
4985
      del self.acquired_locks[locking.LEVEL_NODE]
4986

    
4987
    if self.op.wait_for_sync:
4988
      disk_abort = not _WaitForSync(self, iobj)
4989
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
4990
      # make sure the disks are not degraded (still sync-ing is ok)
4991
      time.sleep(15)
4992
      feedback_fn("* checking mirrors status")
4993
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
4994
    else:
4995
      disk_abort = False
4996

    
4997
    if disk_abort:
4998
      _RemoveDisks(self, iobj)
4999
      self.cfg.RemoveInstance(iobj.name)
5000
      # Make sure the instance lock gets removed
5001
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
5002
      raise errors.OpExecError("There are some degraded disks for"
5003
                               " this instance")
5004

    
5005
    feedback_fn("creating os for instance %s on node %s" %
5006
                (instance, pnode_name))
5007

    
5008
    if iobj.disk_template != constants.DT_DISKLESS:
5009
      if self.op.mode == constants.INSTANCE_CREATE:
5010
        feedback_fn("* running the instance OS create scripts...")
5011
        result = self.rpc.call_instance_os_add(pnode_name, iobj)
5012
        msg = result.RemoteFailMsg()
5013
        if msg:
5014
          raise errors.OpExecError("Could not add os for instance %s"
5015
                                   " on node %s: %s" %
5016
                                   (instance, pnode_name, msg))
5017

    
5018
      elif self.op.mode == constants.INSTANCE_IMPORT:
5019
        feedback_fn("* running the instance OS import scripts...")
5020
        src_node = self.op.src_node
5021
        src_images = self.src_images
5022
        cluster_name = self.cfg.GetClusterName()
5023
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
5024
                                                         src_node, src_images,
5025
                                                         cluster_name)
5026
        import_result.Raise()
5027
        for idx, result in enumerate(import_result.data):
5028
          if not result:
5029
            self.LogWarning("Could not import the image %s for instance"
5030
                            " %s, disk %d, on node %s" %
5031
                            (src_images[idx], instance, idx, pnode_name))
5032
      else:
5033
        # also checked in the prereq part
5034
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
5035
                                     % self.op.mode)
5036

    
5037
    if self.op.start:
5038
      iobj.admin_up = True
5039
      self.cfg.Update(iobj)
5040
      logging.info("Starting instance %s on node %s", instance, pnode_name)
5041
      feedback_fn("* starting instance...")
5042
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
5043
      msg = result.RemoteFailMsg()
5044
      if msg:
5045
        raise errors.OpExecError("Could not start instance: %s" % msg)
5046

    
5047

    
5048
class LUConnectConsole(NoHooksLU):
5049
  """Connect to an instance's console.
5050

5051
  This is somewhat special in that it returns the command line that
5052
  you need to run on the master node in order to connect to the
5053
  console.
5054

5055
  """
5056
  _OP_REQP = ["instance_name"]
5057
  REQ_BGL = False
5058

    
5059
  def ExpandNames(self):
5060
    self._ExpandAndLockInstance()
5061

    
5062
  def CheckPrereq(self):
5063
    """Check prerequisites.
5064

5065
    This checks that the instance is in the cluster.
5066

5067
    """
5068
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5069
    assert self.instance is not None, \
5070
      "Cannot retrieve locked instance %s" % self.op.instance_name
5071
    _CheckNodeOnline(self, self.instance.primary_node)
5072

    
5073
  def Exec(self, feedback_fn):
5074
    """Connect to the console of an instance
5075

5076
    """
5077
    instance = self.instance
5078
    node = instance.primary_node
5079

    
5080
    node_insts = self.rpc.call_instance_list([node],
5081
                                             [instance.hypervisor])[node]
5082
    node_insts.Raise()
5083

    
5084
    if instance.name not in node_insts.data:
5085
      raise errors.OpExecError("Instance %s is not running." % instance.name)
5086

    
5087
    logging.debug("Connecting to console of %s on %s", instance.name, node)
5088

    
5089
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
5090
    cluster = self.cfg.GetClusterInfo()
5091
    # beparams and hvparams are passed separately, to avoid editing the
5092
    # instance and then saving the defaults in the instance itself.
5093
    hvparams = cluster.FillHV(instance)
5094
    beparams = cluster.FillBE(instance)
5095
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
5096

    
5097
    # build ssh cmdline
5098
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
5099

    
5100

    
5101
class LUReplaceDisks(LogicalUnit):
5102
  """Replace the disks of an instance.
5103

5104
  """
5105
  HPATH = "mirrors-replace"
5106
  HTYPE = constants.HTYPE_INSTANCE
5107
  _OP_REQP = ["instance_name", "mode", "disks"]
5108
  REQ_BGL = False
5109

    
5110
  def CheckArguments(self):
5111
    if not hasattr(self.op, "remote_node"):
5112
      self.op.remote_node = None
5113
    if not hasattr(self.op, "iallocator"):
5114
      self.op.iallocator = None
5115

    
5116
    # check for valid parameter combination
5117
    cnt = [self.op.remote_node, self.op.iallocator].count(None)
5118
    if self.op.mode == constants.REPLACE_DISK_CHG:
5119
      if cnt == 2:
5120
        raise errors.OpPrereqError("When changing the secondary either an"
5121
                                   " iallocator script must be used or the"
5122
                                   " new node given")
5123
      elif cnt == 0:
5124
        raise errors.OpPrereqError("Give either the iallocator or the new"
5125
                                   " secondary, not both")
5126
    else: # not replacing the secondary
5127
      if cnt != 2:
5128
        raise errors.OpPrereqError("The iallocator and new node options can"
5129
                                   " be used only when changing the"
5130
                                   " secondary node")
5131

    
5132
  def ExpandNames(self):
5133
    self._ExpandAndLockInstance()
5134

    
5135
    if self.op.iallocator is not None:
5136
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5137
    elif self.op.remote_node is not None:
5138
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
5139
      if remote_node is None:
5140
        raise errors.OpPrereqError("Node '%s' not known" %
5141
                                   self.op.remote_node)
5142
      self.op.remote_node = remote_node
5143
      # Warning: do not remove the locking of the new secondary here
5144
      # unless DRBD8.AddChildren is changed to work in parallel;
5145
      # currently it doesn't since parallel invocations of
5146
      # FindUnusedMinor will conflict
5147
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
5148
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5149
    else:
5150
      self.needed_locks[locking.LEVEL_NODE] = []
5151
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5152

    
5153
  def DeclareLocks(self, level):
5154
    # If we're not already locking all nodes in the set we have to declare the
5155
    # instance's primary/secondary nodes.
5156
    if (level == locking.LEVEL_NODE and
5157
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
5158
      self._LockInstancesNodes()
5159

    
5160
  def _RunAllocator(self):
5161
    """Compute a new secondary node using an IAllocator.
5162

5163
    """
5164
    ial = IAllocator(self,
5165
                     mode=constants.IALLOCATOR_MODE_RELOC,
5166
                     name=self.op.instance_name,
5167
                     relocate_from=[self.sec_node])
5168

    
5169
    ial.Run(self.op.iallocator)
5170

    
5171
    if not ial.success:
5172
      raise errors.OpPrereqError("Can't compute nodes using"
5173
                                 " iallocator '%s': %s" % (self.op.iallocator,
5174
                                                           ial.info))
5175
    if len(ial.nodes) != ial.required_nodes:
5176
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5177
                                 " of nodes (%s), required %s" %
5178
                                 (len(ial.nodes), ial.required_nodes))
5179
    self.op.remote_node = ial.nodes[0]
5180
    self.LogInfo("Selected new secondary for the instance: %s",
5181
                 self.op.remote_node)
5182

    
5183
  def BuildHooksEnv(self):
5184
    """Build hooks env.
5185

5186
    This runs on the master, the primary and all the secondaries.
5187

5188
    """
5189
    env = {
5190
      "MODE": self.op.mode,
5191
      "NEW_SECONDARY": self.op.remote_node,
5192
      "OLD_SECONDARY": self.instance.secondary_nodes[0],
5193
      }
5194
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5195
    nl = [
5196
      self.cfg.GetMasterNode(),
5197
      self.instance.primary_node,
5198
      ]
5199
    if self.op.remote_node is not None:
5200
      nl.append(self.op.remote_node)
5201
    return env, nl, nl
5202

    
5203
  def CheckPrereq(self):
5204
    """Check prerequisites.
5205

5206
    This checks that the instance is in the cluster.
5207

5208
    """
5209
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5210
    assert instance is not None, \
5211
      "Cannot retrieve locked instance %s" % self.op.instance_name
5212
    self.instance = instance
5213

    
5214
    if instance.disk_template != constants.DT_DRBD8:
5215
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
5216
                                 " instances")
5217

    
5218
    if len(instance.secondary_nodes) != 1:
5219
      raise errors.OpPrereqError("The instance has a strange layout,"
5220
                                 " expected one secondary but found %d" %
5221
                                 len(instance.secondary_nodes))
5222

    
5223
    self.sec_node = instance.secondary_nodes[0]
5224

    
5225
    if self.op.iallocator is not None:
5226
      self._RunAllocator()
5227

    
5228
    remote_node = self.op.remote_node
5229
    if remote_node is not None:
5230
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
5231
      assert self.remote_node_info is not None, \
5232
        "Cannot retrieve locked node %s" % remote_node
5233
    else:
5234
      self.remote_node_info = None
5235
    if remote_node == instance.primary_node:
5236
      raise errors.OpPrereqError("The specified node is the primary node of"
5237
                                 " the instance.")
5238
    elif remote_node == self.sec_node:
5239
      raise errors.OpPrereqError("The specified node is already the"
5240
                                 " secondary node of the instance.")
5241

    
5242
    if self.op.mode == constants.REPLACE_DISK_PRI:
5243
      n1 = self.tgt_node = instance.primary_node
5244
      n2 = self.oth_node = self.sec_node
5245
    elif self.op.mode == constants.REPLACE_DISK_SEC:
5246
      n1 = self.tgt_node = self.sec_node
5247
      n2 = self.oth_node = instance.primary_node
5248
    elif self.op.mode == constants.REPLACE_DISK_CHG:
5249
      n1 = self.new_node = remote_node
5250
      n2 = self.oth_node = instance.primary_node
5251
      self.tgt_node = self.sec_node
5252
      _CheckNodeNotDrained(self, remote_node)
5253
    else:
5254
      raise errors.ProgrammerError("Unhandled disk replace mode")
5255

    
5256
    _CheckNodeOnline(self, n1)
5257
    _CheckNodeOnline(self, n2)
5258

    
5259
    if not self.op.disks:
5260
      self.op.disks = range(len(instance.disks))
5261

    
5262
    for disk_idx in self.op.disks:
5263
      instance.FindDisk(disk_idx)
5264

    
5265
  def _ExecD8DiskOnly(self, feedback_fn):
5266
    """Replace a disk on the primary or secondary for dbrd8.
5267

5268
    The algorithm for replace is quite complicated:
5269

5270
      1. for each disk to be replaced:
5271

5272
        1. create new LVs on the target node with unique names
5273
        1. detach old LVs from the drbd device
5274
        1. rename old LVs to name_replaced.<time_t>
5275
        1. rename new LVs to old LVs
5276
        1. attach the new LVs (with the old names now) to the drbd device
5277

5278
      1. wait for sync across all devices
5279

5280
      1. for each modified disk:
5281

5282
        1. remove old LVs (which have the name name_replaces.<time_t>)
5283

5284
    Failures are not very well handled.
5285

5286
    """
5287
    steps_total = 6
5288
    warning, info = (self.proc.LogWarning, self.proc.LogInfo)
5289
    instance = self.instance
5290
    iv_names = {}
5291
    vgname = self.cfg.GetVGName()
5292
    # start of work
5293
    cfg = self.cfg
5294
    tgt_node = self.tgt_node
5295
    oth_node = self.oth_node
5296

    
5297
    # Step: check device activation
5298
    self.proc.LogStep(1, steps_total, "check device existence")
5299
    info("checking volume groups")
5300
    my_vg = cfg.GetVGName()
5301
    results = self.rpc.call_vg_list([oth_node, tgt_node])
5302
    if not results:
5303
      raise errors.OpExecError("Can't list volume groups on the nodes")
5304
    for node in oth_node, tgt_node:
5305
      res = results[node]
5306
      if res.failed or not res.data or my_vg not in res.data:
5307
        raise errors.OpExecError("Volume group '%s' not found on %s" %
5308
                                 (my_vg, node))
5309
    for idx, dev in enumerate(instance.disks):
5310
      if idx not in self.op.disks:
5311
        continue
5312
      for node in tgt_node, oth_node:
5313
        info("checking disk/%d on %s" % (idx, node))
5314
        cfg.SetDiskID(dev, node)
5315
        result = self.rpc.call_blockdev_find(node, dev)
5316
        msg = result.RemoteFailMsg()
5317
        if not msg and not result.payload:
5318
          msg = "disk not found"
5319
        if msg:
5320
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
5321
                                   (idx, node, msg))
5322

    
5323
    # Step: check other node consistency
5324
    self.proc.LogStep(2, steps_total, "check peer consistency")
5325
    for idx, dev in enumerate(instance.disks):
5326
      if idx not in self.op.disks:
5327
        continue
5328
      info("checking disk/%d consistency on %s" % (idx, oth_node))
5329
      if not _CheckDiskConsistency(self, dev, oth_node,
5330
                                   oth_node==instance.primary_node):
5331
        raise errors.OpExecError("Peer node (%s) has degraded storage, unsafe"
5332
                                 " to replace disks on this node (%s)" %
5333
                                 (oth_node, tgt_node))
5334

    
5335
    # Step: create new storage
5336
    self.proc.LogStep(3, steps_total, "allocate new storage")
5337
    for idx, dev in enumerate(instance.disks):
5338
      if idx not in self.op.disks:
5339
        continue
5340
      size = dev.size
5341
      cfg.SetDiskID(dev, tgt_node)
5342
      lv_names = [".disk%d_%s" % (idx, suf)
5343
                  for suf in ["data", "meta"]]
5344
      names = _GenerateUniqueNames(self, lv_names)
5345
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5346
                             logical_id=(vgname, names[0]))
5347
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5348
                             logical_id=(vgname, names[1]))
5349
      new_lvs = [lv_data, lv_meta]
5350
      old_lvs = dev.children
5351
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
5352
      info("creating new local storage on %s for %s" %
5353
           (tgt_node, dev.iv_name))
5354
      # we pass force_create=True to force the LVM creation
5355
      for new_lv in new_lvs:
5356
        _CreateBlockDev(self, tgt_node, instance, new_lv, True,
5357
                        _GetInstanceInfoText(instance), False)
5358

    
5359
    # Step: for each lv, detach+rename*2+attach
5360
    self.proc.LogStep(4, steps_total, "change drbd configuration")
5361
    for dev, old_lvs, new_lvs in iv_names.itervalues():
5362
      info("detaching %s drbd from local storage" % dev.iv_name)
5363
      result = self.rpc.call_blockdev_removechildren(tgt_node, dev, old_lvs)
5364
      result.Raise()
5365
      if not result.data:
5366
        raise errors.OpExecError("Can't detach drbd from local storage on node"
5367
                                 " %s for device %s" % (tgt_node, dev.iv_name))
5368
      #dev.children = []
5369
      #cfg.Update(instance)
5370

    
5371
      # ok, we created the new LVs, so now we know we have the needed
5372
      # storage; as such, we proceed on the target node to rename
5373
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
5374
      # using the assumption that logical_id == physical_id (which in
5375
      # turn is the unique_id on that node)
5376

    
5377
      # FIXME(iustin): use a better name for the replaced LVs
5378
      temp_suffix = int(time.time())
5379
      ren_fn = lambda d, suff: (d.physical_id[0],
5380
                                d.physical_id[1] + "_replaced-%s" % suff)
5381
      # build the rename list based on what LVs exist on the node
5382
      rlist = []
5383
      for to_ren in old_lvs:
5384
        result = self.rpc.call_blockdev_find(tgt_node, to_ren)
5385
        if not result.RemoteFailMsg() and result.payload:
5386
          # device exists
5387
          rlist.append((to_ren, ren_fn(to_ren, temp_suffix)))
5388

    
5389
      info("renaming the old LVs on the target node")
5390
      result = self.rpc.call_blockdev_rename(tgt_node, rlist)
5391
      result.Raise()
5392
      if not result.data:
5393
        raise errors.OpExecError("Can't rename old LVs on node %s" % tgt_node)
5394
      # now we rename the new LVs to the old LVs
5395
      info("renaming the new LVs on the target node")
5396
      rlist = [(new, old.physical_id) for old, new in zip(old_lvs, new_lvs)]
5397
      result = self.rpc.call_blockdev_rename(tgt_node, rlist)
5398
      result.Raise()
5399
      if not result.data:
5400
        raise errors.OpExecError("Can't rename new LVs on node %s" % tgt_node)
5401

    
5402
      for old, new in zip(old_lvs, new_lvs):
5403
        new.logical_id = old.logical_id
5404
        cfg.SetDiskID(new, tgt_node)
5405

    
5406
      for disk in old_lvs:
5407
        disk.logical_id = ren_fn(disk, temp_suffix)
5408
        cfg.SetDiskID(disk, tgt_node)
5409

    
5410
      # now that the new lvs have the old name, we can add them to the device
5411
      info("adding new mirror component on %s" % tgt_node)
5412
      result = self.rpc.call_blockdev_addchildren(tgt_node, dev, new_lvs)
5413
      if result.failed or not result.data:
5414
        for new_lv in new_lvs:
5415
          msg = self.rpc.call_blockdev_remove(tgt_node, new_lv).RemoteFailMsg()
5416
          if msg:
5417
            warning("Can't rollback device %s: %s", dev, msg,
5418
                    hint="cleanup manually the unused logical volumes")
5419
        raise errors.OpExecError("Can't add local storage to drbd")
5420

    
5421
      dev.children = new_lvs
5422
      cfg.Update(instance)
5423

    
5424
    # Step: wait for sync
5425

    
5426
    # this can fail as the old devices are degraded and _WaitForSync
5427
    # does a combined result over all disks, so we don't check its
5428
    # return value
5429
    self.proc.LogStep(5, steps_total, "sync devices")
5430
    _WaitForSync(self, instance, unlock=True)
5431

    
5432
    # so check manually all the devices
5433
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
5434
      cfg.SetDiskID(dev, instance.primary_node)
5435
      result = self.rpc.call_blockdev_find(instance.primary_node, dev)
5436
      msg = result.RemoteFailMsg()
5437
      if not msg and not result.payload:
5438
        msg = "disk not found"
5439
      if msg:
5440
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
5441
                                 (name, msg))
5442
      if result.payload[5]:
5443
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
5444

    
5445
    # Step: remove old storage
5446
    self.proc.LogStep(6, steps_total, "removing old storage")
5447
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
5448
      info("remove logical volumes for %s" % name)
5449
      for lv in old_lvs:
5450
        cfg.SetDiskID(lv, tgt_node)
5451
        msg = self.rpc.call_blockdev_remove(tgt_node, lv).RemoteFailMsg()
5452
        if msg:
5453
          warning("Can't remove old LV: %s" % msg,
5454
                  hint="manually remove unused LVs")
5455
          continue
5456

    
5457
  def _ExecD8Secondary(self, feedback_fn):
5458
    """Replace the secondary node for drbd8.
5459

5460
    The algorithm for replace is quite complicated:
5461
      - for all disks of the instance:
5462
        - create new LVs on the new node with same names
5463
        - shutdown the drbd device on the old secondary
5464
        - disconnect the drbd network on the primary
5465
        - create the drbd device on the new secondary
5466
        - network attach the drbd on the primary, using an artifice:
5467
          the drbd code for Attach() will connect to the network if it
5468
          finds a device which is connected to the good local disks but
5469
          not network enabled
5470
      - wait for sync across all devices
5471
      - remove all disks from the old secondary
5472

5473
    Failures are not very well handled.
5474

5475
    """
5476
    steps_total = 6
5477
    warning, info = (self.proc.LogWarning, self.proc.LogInfo)
5478
    instance = self.instance
5479
    iv_names = {}
5480
    # start of work
5481
    cfg = self.cfg
5482
    old_node = self.tgt_node
5483
    new_node = self.new_node
5484
    pri_node = instance.primary_node
5485
    nodes_ip = {
5486
      old_node: self.cfg.GetNodeInfo(old_node).secondary_ip,
5487
      new_node: self.cfg.GetNodeInfo(new_node).secondary_ip,
5488
      pri_node: self.cfg.GetNodeInfo(pri_node).secondary_ip,
5489
      }
5490

    
5491
    # Step: check device activation
5492
    self.proc.LogStep(1, steps_total, "check device existence")
5493
    info("checking volume groups")
5494
    my_vg = cfg.GetVGName()
5495
    results = self.rpc.call_vg_list([pri_node, new_node])
5496
    for node in pri_node, new_node:
5497
      res = results[node]
5498
      if res.failed or not res.data or my_vg not in res.data:
5499
        raise errors.OpExecError("Volume group '%s' not found on %s" %
5500
                                 (my_vg, node))
5501
    for idx, dev in enumerate(instance.disks):
5502
      if idx not in self.op.disks:
5503
        continue
5504
      info("checking disk/%d on %s" % (idx, pri_node))
5505
      cfg.SetDiskID(dev, pri_node)
5506
      result = self.rpc.call_blockdev_find(pri_node, dev)
5507
      msg = result.RemoteFailMsg()
5508
      if not msg and not result.payload:
5509
        msg = "disk not found"
5510
      if msg:
5511
        raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
5512
                                 (idx, pri_node, msg))
5513

    
5514
    # Step: check other node consistency
5515
    self.proc.LogStep(2, steps_total, "check peer consistency")
5516
    for idx, dev in enumerate(instance.disks):
5517
      if idx not in self.op.disks:
5518
        continue
5519
      info("checking disk/%d consistency on %s" % (idx, pri_node))
5520
      if not _CheckDiskConsistency(self, dev, pri_node, True, ldisk=True):
5521
        raise errors.OpExecError("Primary node (%s) has degraded storage,"
5522
                                 " unsafe to replace the secondary" %
5523
                                 pri_node)
5524

    
5525
    # Step: create new storage
5526
    self.proc.LogStep(3, steps_total, "allocate new storage")
5527
    for idx, dev in enumerate(instance.disks):
5528
      info("adding new local storage on %s for disk/%d" %
5529
           (new_node, idx))
5530
      # we pass force_create=True to force LVM creation
5531
      for new_lv in dev.children:
5532
        _CreateBlockDev(self, new_node, instance, new_lv, True,
5533
                        _GetInstanceInfoText(instance), False)
5534

    
5535
    # Step 4: dbrd minors and drbd setups changes
5536
    # after this, we must manually remove the drbd minors on both the
5537
    # error and the success paths
5538
    minors = cfg.AllocateDRBDMinor([new_node for dev in instance.disks],
5539
                                   instance.name)
5540
    logging.debug("Allocated minors %s" % (minors,))
5541
    self.proc.LogStep(4, steps_total, "changing drbd configuration")
5542
    for idx, (dev, new_minor) in enumerate(zip(instance.disks, minors)):
5543
      size = dev.size
5544
      info("activating a new drbd on %s for disk/%d" % (new_node, idx))
5545
      # create new devices on new_node; note that we create two IDs:
5546
      # one without port, so the drbd will be activated without
5547
      # networking information on the new node at this stage, and one
5548
      # with network, for the latter activation in step 4
5549
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
5550
      if pri_node == o_node1:
5551
        p_minor = o_minor1
5552
      else:
5553
        p_minor = o_minor2
5554

    
5555
      new_alone_id = (pri_node, new_node, None, p_minor, new_minor, o_secret)
5556
      new_net_id = (pri_node, new_node, o_port, p_minor, new_minor, o_secret)
5557

    
5558
      iv_names[idx] = (dev, dev.children, new_net_id)
5559
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
5560
                    new_net_id)
5561
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
5562
                              logical_id=new_alone_id,
5563
                              children=dev.children,
5564
                              size=dev.size)
5565
      try:
5566
        _CreateSingleBlockDev(self, new_node, instance, new_drbd,
5567
                              _GetInstanceInfoText(instance), False)
5568
      except errors.GenericError:
5569
        self.cfg.ReleaseDRBDMinors(instance.name)
5570
        raise
5571

    
5572
    for idx, dev in enumerate(instance.disks):
5573
      # we have new devices, shutdown the drbd on the old secondary
5574
      info("shutting down drbd for disk/%d on old node" % idx)
5575
      cfg.SetDiskID(dev, old_node)
5576
      msg = self.rpc.call_blockdev_shutdown(old_node, dev).RemoteFailMsg()
5577
      if msg:
5578
        warning("Failed to shutdown drbd for disk/%d on old node: %s" %
5579
                (idx, msg),
5580
                hint="Please cleanup this device manually as soon as possible")
5581

    
5582
    info("detaching primary drbds from the network (=> standalone)")
5583
    result = self.rpc.call_drbd_disconnect_net([pri_node], nodes_ip,
5584
                                               instance.disks)[pri_node]
5585

    
5586
    msg = result.RemoteFailMsg()
5587
    if msg:
5588
      # detaches didn't succeed (unlikely)
5589
      self.cfg.ReleaseDRBDMinors(instance.name)
5590
      raise errors.OpExecError("Can't detach the disks from the network on"
5591
                               " old node: %s" % (msg,))
5592

    
5593
    # if we managed to detach at least one, we update all the disks of
5594
    # the instance to point to the new secondary
5595
    info("updating instance configuration")
5596
    for dev, _, new_logical_id in iv_names.itervalues():
5597
      dev.logical_id = new_logical_id
5598
      cfg.SetDiskID(dev, pri_node)
5599
    cfg.Update(instance)
5600

    
5601
    # and now perform the drbd attach
5602
    info("attaching primary drbds to new secondary (standalone => connected)")
5603
    result = self.rpc.call_drbd_attach_net([pri_node, new_node], nodes_ip,
5604
                                           instance.disks, instance.name,
5605
                                           False)
5606
    for to_node, to_result in result.items():
5607
      msg = to_result.RemoteFailMsg()
5608
      if msg:
5609
        warning("can't attach drbd disks on node %s: %s", to_node, msg,
5610
                hint="please do a gnt-instance info to see the"
5611
                " status of disks")
5612

    
5613
    # this can fail as the old devices are degraded and _WaitForSync
5614
    # does a combined result over all disks, so we don't check its
5615
    # return value
5616
    self.proc.LogStep(5, steps_total, "sync devices")
5617
    _WaitForSync(self, instance, unlock=True)
5618

    
5619
    # so check manually all the devices
5620
    for idx, (dev, old_lvs, _) in iv_names.iteritems():
5621
      cfg.SetDiskID(dev, pri_node)
5622
      result = self.rpc.call_blockdev_find(pri_node, dev)
5623
      msg = result.RemoteFailMsg()
5624
      if not msg and not result.payload:
5625
        msg = "disk not found"
5626
      if msg:
5627
        raise errors.OpExecError("Can't find DRBD device disk/%d: %s" %
5628
                                 (idx, msg))
5629
      if result.payload[5]:
5630
        raise errors.OpExecError("DRBD device disk/%d is degraded!" % idx)
5631

    
5632
    self.proc.LogStep(6, steps_total, "removing old storage")
5633
    for idx, (dev, old_lvs, _) in iv_names.iteritems():
5634
      info("remove logical volumes for disk/%d" % idx)
5635
      for lv in old_lvs:
5636
        cfg.SetDiskID(lv, old_node)
5637
        msg = self.rpc.call_blockdev_remove(old_node, lv).RemoteFailMsg()
5638
        if msg:
5639
          warning("Can't remove LV on old secondary: %s", msg,
5640
                  hint="Cleanup stale volumes by hand")
5641

    
5642
  def Exec(self, feedback_fn):
5643
    """Execute disk replacement.
5644

5645
    This dispatches the disk replacement to the appropriate handler.
5646

5647
    """
5648
    instance = self.instance
5649

    
5650
    # Activate the instance disks if we're replacing them on a down instance
5651
    if not instance.admin_up:
5652
      _StartInstanceDisks(self, instance, True)
5653

    
5654
    if self.op.mode == constants.REPLACE_DISK_CHG:
5655
      fn = self._ExecD8Secondary
5656
    else:
5657
      fn = self._ExecD8DiskOnly
5658

    
5659
    ret = fn(feedback_fn)
5660

    
5661
    # Deactivate the instance disks if we're replacing them on a down instance
5662
    if not instance.admin_up:
5663
      _SafeShutdownInstanceDisks(self, instance)
5664

    
5665
    return ret
5666

    
5667

    
5668
class LUGrowDisk(LogicalUnit):
5669
  """Grow a disk of an instance.
5670

5671
  """
5672
  HPATH = "disk-grow"
5673
  HTYPE = constants.HTYPE_INSTANCE
5674
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
5675
  REQ_BGL = False
5676

    
5677
  def ExpandNames(self):
5678
    self._ExpandAndLockInstance()
5679
    self.needed_locks[locking.LEVEL_NODE] = []
5680
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5681

    
5682
  def DeclareLocks(self, level):
5683
    if level == locking.LEVEL_NODE:
5684
      self._LockInstancesNodes()
5685

    
5686
  def BuildHooksEnv(self):
5687
    """Build hooks env.
5688

5689
    This runs on the master, the primary and all the secondaries.
5690

5691
    """
5692
    env = {
5693
      "DISK": self.op.disk,
5694
      "AMOUNT": self.op.amount,
5695
      }
5696
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5697
    nl = [
5698
      self.cfg.GetMasterNode(),
5699
      self.instance.primary_node,
5700
      ]
5701
    return env, nl, nl
5702

    
5703
  def CheckPrereq(self):
5704
    """Check prerequisites.
5705

5706
    This checks that the instance is in the cluster.
5707

5708
    """
5709
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5710
    assert instance is not None, \
5711
      "Cannot retrieve locked instance %s" % self.op.instance_name
5712
    nodenames = list(instance.all_nodes)
5713
    for node in nodenames:
5714
      _CheckNodeOnline(self, node)
5715

    
5716

    
5717
    self.instance = instance
5718

    
5719
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
5720
      raise errors.OpPrereqError("Instance's disk layout does not support"
5721
                                 " growing.")
5722

    
5723
    self.disk = instance.FindDisk(self.op.disk)
5724

    
5725
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
5726
                                       instance.hypervisor)
5727
    for node in nodenames:
5728
      info = nodeinfo[node]
5729
      if info.failed or not info.data:
5730
        raise errors.OpPrereqError("Cannot get current information"
5731
                                   " from node '%s'" % node)
5732
      vg_free = info.data.get('vg_free', None)
5733
      if not isinstance(vg_free, int):
5734
        raise errors.OpPrereqError("Can't compute free disk space on"
5735
                                   " node %s" % node)
5736
      if self.op.amount > vg_free:
5737
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
5738
                                   " %d MiB available, %d MiB required" %
5739
                                   (node, vg_free, self.op.amount))
5740

    
5741
  def Exec(self, feedback_fn):
5742
    """Execute disk grow.
5743

5744
    """
5745
    instance = self.instance
5746
    disk = self.disk
5747
    for node in instance.all_nodes:
5748
      self.cfg.SetDiskID(disk, node)
5749
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
5750
      msg = result.RemoteFailMsg()
5751
      if msg:
5752
        raise errors.OpExecError("Grow request failed to node %s: %s" %
5753
                                 (node, msg))
5754
    disk.RecordGrow(self.op.amount)
5755
    self.cfg.Update(instance)
5756
    if self.op.wait_for_sync:
5757
      disk_abort = not _WaitForSync(self, instance)
5758
      if disk_abort:
5759
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
5760
                             " status.\nPlease check the instance.")
5761

    
5762

    
5763
class LUQueryInstanceData(NoHooksLU):
5764
  """Query runtime instance data.
5765

5766
  """
5767
  _OP_REQP = ["instances", "static"]
5768
  REQ_BGL = False
5769

    
5770
  def ExpandNames(self):
5771
    self.needed_locks = {}
5772
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
5773

    
5774
    if not isinstance(self.op.instances, list):
5775
      raise errors.OpPrereqError("Invalid argument type 'instances'")
5776

    
5777
    if self.op.instances:
5778
      self.wanted_names = []
5779
      for name in self.op.instances:
5780
        full_name = self.cfg.ExpandInstanceName(name)
5781
        if full_name is None:
5782
          raise errors.OpPrereqError("Instance '%s' not known" % name)
5783
        self.wanted_names.append(full_name)
5784
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
5785
    else:
5786
      self.wanted_names = None
5787
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5788

    
5789
    self.needed_locks[locking.LEVEL_NODE] = []
5790
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5791

    
5792
  def DeclareLocks(self, level):
5793
    if level == locking.LEVEL_NODE:
5794
      self._LockInstancesNodes()
5795

    
5796
  def CheckPrereq(self):
5797
    """Check prerequisites.
5798

5799
    This only checks the optional instance list against the existing names.
5800

5801
    """
5802
    if self.wanted_names is None:
5803
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5804

    
5805
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
5806
                             in self.wanted_names]
5807
    return
5808

    
5809
  def _ComputeDiskStatus(self, instance, snode, dev):
5810
    """Compute block device status.
5811

5812
    """
5813
    static = self.op.static
5814
    if not static:
5815
      self.cfg.SetDiskID(dev, instance.primary_node)
5816
      dev_pstatus = self.rpc.call_blockdev_find(instance.primary_node, dev)
5817
      if dev_pstatus.offline:
5818
        dev_pstatus = None
5819
      else:
5820
        msg = dev_pstatus.RemoteFailMsg()
5821
        if msg:
5822
          raise errors.OpExecError("Can't compute disk status for %s: %s" %
5823
                                   (instance.name, msg))
5824
        dev_pstatus = dev_pstatus.payload
5825
    else:
5826
      dev_pstatus = None
5827

    
5828
    if dev.dev_type in constants.LDS_DRBD:
5829
      # we change the snode then (otherwise we use the one passed in)
5830
      if dev.logical_id[0] == instance.primary_node:
5831
        snode = dev.logical_id[1]
5832
      else:
5833
        snode = dev.logical_id[0]
5834

    
5835
    if snode and not static:
5836
      self.cfg.SetDiskID(dev, snode)
5837
      dev_sstatus = self.rpc.call_blockdev_find(snode, dev)
5838
      if dev_sstatus.offline:
5839
        dev_sstatus = None
5840
      else:
5841
        msg = dev_sstatus.RemoteFailMsg()
5842
        if msg:
5843
          raise errors.OpExecError("Can't compute disk status for %s: %s" %
5844
                                   (instance.name, msg))
5845
        dev_sstatus = dev_sstatus.payload
5846
    else:
5847
      dev_sstatus = None
5848

    
5849
    if dev.children:
5850
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
5851
                      for child in dev.children]
5852
    else:
5853
      dev_children = []
5854

    
5855
    data = {
5856
      "iv_name": dev.iv_name,
5857
      "dev_type": dev.dev_type,
5858
      "logical_id": dev.logical_id,
5859
      "physical_id": dev.physical_id,
5860
      "pstatus": dev_pstatus,
5861
      "sstatus": dev_sstatus,
5862
      "children": dev_children,
5863
      "mode": dev.mode,
5864
      "size": dev.size,
5865
      }
5866

    
5867
    return data
5868

    
5869
  def Exec(self, feedback_fn):
5870
    """Gather and return data"""
5871
    result = {}
5872

    
5873
    cluster = self.cfg.GetClusterInfo()
5874

    
5875
    for instance in self.wanted_instances:
5876
      if not self.op.static:
5877
        remote_info = self.rpc.call_instance_info(instance.primary_node,
5878
                                                  instance.name,
5879
                                                  instance.hypervisor)
5880
        remote_info.Raise()
5881
        remote_info = remote_info.data
5882
        if remote_info and "state" in remote_info:
5883
          remote_state = "up"
5884
        else:
5885
          remote_state = "down"
5886
      else:
5887
        remote_state = None
5888
      if instance.admin_up:
5889
        config_state = "up"
5890
      else:
5891
        config_state = "down"
5892

    
5893
      disks = [self._ComputeDiskStatus(instance, None, device)
5894
               for device in instance.disks]
5895

    
5896
      idict = {
5897
        "name": instance.name,
5898
        "config_state": config_state,
5899
        "run_state": remote_state,
5900
        "pnode": instance.primary_node,
5901
        "snodes": instance.secondary_nodes,
5902
        "os": instance.os,
5903
        "nics": [(nic.mac, nic.ip, nic.bridge) for nic in instance.nics],
5904
        "disks": disks,
5905
        "hypervisor": instance.hypervisor,
5906
        "network_port": instance.network_port,
5907
        "hv_instance": instance.hvparams,
5908
        "hv_actual": cluster.FillHV(instance),
5909
        "be_instance": instance.beparams,
5910
        "be_actual": cluster.FillBE(instance),
5911
        }
5912

    
5913
      result[instance.name] = idict
5914

    
5915
    return result
5916

    
5917

    
5918
class LUSetInstanceParams(LogicalUnit):
5919
  """Modifies an instances's parameters.
5920

5921
  """
5922
  HPATH = "instance-modify"
5923
  HTYPE = constants.HTYPE_INSTANCE
5924
  _OP_REQP = ["instance_name"]
5925
  REQ_BGL = False
5926

    
5927
  def CheckArguments(self):
5928
    if not hasattr(self.op, 'nics'):
5929
      self.op.nics = []
5930
    if not hasattr(self.op, 'disks'):
5931
      self.op.disks = []
5932
    if not hasattr(self.op, 'beparams'):
5933
      self.op.beparams = {}
5934
    if not hasattr(self.op, 'hvparams'):
5935
      self.op.hvparams = {}
5936
    self.op.force = getattr(self.op, "force", False)
5937
    if not (self.op.nics or self.op.disks or
5938
            self.op.hvparams or self.op.beparams):
5939
      raise errors.OpPrereqError("No changes submitted")
5940

    
5941
    # Disk validation
5942
    disk_addremove = 0
5943
    for disk_op, disk_dict in self.op.disks:
5944
      if disk_op == constants.DDM_REMOVE:
5945
        disk_addremove += 1
5946
        continue
5947
      elif disk_op == constants.DDM_ADD:
5948
        disk_addremove += 1
5949
      else:
5950
        if not isinstance(disk_op, int):
5951
          raise errors.OpPrereqError("Invalid disk index")
5952
      if disk_op == constants.DDM_ADD:
5953
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
5954
        if mode not in constants.DISK_ACCESS_SET:
5955
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
5956
        size = disk_dict.get('size', None)
5957
        if size is None:
5958
          raise errors.OpPrereqError("Required disk parameter size missing")
5959
        try:
5960
          size = int(size)
5961
        except ValueError, err:
5962
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
5963
                                     str(err))
5964
        disk_dict['size'] = size
5965
      else:
5966
        # modification of disk
5967
        if 'size' in disk_dict:
5968
          raise errors.OpPrereqError("Disk size change not possible, use"
5969
                                     " grow-disk")
5970

    
5971
    if disk_addremove > 1:
5972
      raise errors.OpPrereqError("Only one disk add or remove operation"
5973
                                 " supported at a time")
5974

    
5975
    # NIC validation
5976
    nic_addremove = 0
5977
    for nic_op, nic_dict in self.op.nics:
5978
      if nic_op == constants.DDM_REMOVE:
5979
        nic_addremove += 1
5980
        continue
5981
      elif nic_op == constants.DDM_ADD:
5982
        nic_addremove += 1
5983
      else:
5984
        if not isinstance(nic_op, int):
5985
          raise errors.OpPrereqError("Invalid nic index")
5986

    
5987
      # nic_dict should be a dict
5988
      nic_ip = nic_dict.get('ip', None)
5989
      if nic_ip is not None:
5990
        if nic_ip.lower() == constants.VALUE_NONE:
5991
          nic_dict['ip'] = None
5992
        else:
5993
          if not utils.IsValidIP(nic_ip):
5994
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
5995

    
5996
      if nic_op == constants.DDM_ADD:
5997
        nic_bridge = nic_dict.get('bridge', None)
5998
        if nic_bridge is None:
5999
          nic_dict['bridge'] = self.cfg.GetDefBridge()
6000
        nic_mac = nic_dict.get('mac', None)
6001
        if nic_mac is None:
6002
          nic_dict['mac'] = constants.VALUE_AUTO
6003

    
6004
      if 'mac' in nic_dict:
6005
        nic_mac = nic_dict['mac']
6006
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6007
          if not utils.IsValidMac(nic_mac):
6008
            raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
6009
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
6010
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
6011
                                     " modifying an existing nic")
6012

    
6013
    if nic_addremove > 1:
6014
      raise errors.OpPrereqError("Only one NIC add or remove operation"
6015
                                 " supported at a time")
6016

    
6017
  def ExpandNames(self):
6018
    self._ExpandAndLockInstance()
6019
    self.needed_locks[locking.LEVEL_NODE] = []
6020
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6021

    
6022
  def DeclareLocks(self, level):
6023
    if level == locking.LEVEL_NODE:
6024
      self._LockInstancesNodes()
6025

    
6026
  def BuildHooksEnv(self):
6027
    """Build hooks env.
6028

6029
    This runs on the master, primary and secondaries.
6030

6031
    """
6032
    args = dict()
6033
    if constants.BE_MEMORY in self.be_new:
6034
      args['memory'] = self.be_new[constants.BE_MEMORY]
6035
    if constants.BE_VCPUS in self.be_new:
6036
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
6037
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
6038
    # information at all.
6039
    if self.op.nics:
6040
      args['nics'] = []
6041
      nic_override = dict(self.op.nics)
6042
      for idx, nic in enumerate(self.instance.nics):
6043
        if idx in nic_override:
6044
          this_nic_override = nic_override[idx]
6045
        else:
6046
          this_nic_override = {}
6047
        if 'ip' in this_nic_override:
6048
          ip = this_nic_override['ip']
6049
        else:
6050
          ip = nic.ip
6051
        if 'bridge' in this_nic_override:
6052
          bridge = this_nic_override['bridge']
6053
        else:
6054
          bridge = nic.bridge
6055
        if 'mac' in this_nic_override:
6056
          mac = this_nic_override['mac']
6057
        else:
6058
          mac = nic.mac
6059
        args['nics'].append((ip, bridge, mac))
6060
      if constants.DDM_ADD in nic_override:
6061
        ip = nic_override[constants.DDM_ADD].get('ip', None)
6062
        bridge = nic_override[constants.DDM_ADD]['bridge']
6063
        mac = nic_override[constants.DDM_ADD]['mac']
6064
        args['nics'].append((ip, bridge, mac))
6065
      elif constants.DDM_REMOVE in nic_override:
6066
        del args['nics'][-1]
6067

    
6068
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
6069
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6070
    return env, nl, nl
6071

    
6072
  def CheckPrereq(self):
6073
    """Check prerequisites.
6074

6075
    This only checks the instance list against the existing names.
6076

6077
    """
6078
    force = self.force = self.op.force
6079

    
6080
    # checking the new params on the primary/secondary nodes
6081

    
6082
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6083
    assert self.instance is not None, \
6084
      "Cannot retrieve locked instance %s" % self.op.instance_name
6085
    pnode = instance.primary_node
6086
    nodelist = list(instance.all_nodes)
6087

    
6088
    # hvparams processing
6089
    if self.op.hvparams:
6090
      i_hvdict = copy.deepcopy(instance.hvparams)
6091
      for key, val in self.op.hvparams.iteritems():
6092
        if val == constants.VALUE_DEFAULT:
6093
          try:
6094
            del i_hvdict[key]
6095
          except KeyError:
6096
            pass
6097
        else:
6098
          i_hvdict[key] = val
6099
      cluster = self.cfg.GetClusterInfo()
6100
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
6101
      hv_new = cluster.FillDict(cluster.hvparams[instance.hypervisor],
6102
                                i_hvdict)
6103
      # local check
6104
      hypervisor.GetHypervisor(
6105
        instance.hypervisor).CheckParameterSyntax(hv_new)
6106
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
6107
      self.hv_new = hv_new # the new actual values
6108
      self.hv_inst = i_hvdict # the new dict (without defaults)
6109
    else:
6110
      self.hv_new = self.hv_inst = {}
6111

    
6112
    # beparams processing
6113
    if self.op.beparams:
6114
      i_bedict = copy.deepcopy(instance.beparams)
6115
      for key, val in self.op.beparams.iteritems():
6116
        if val == constants.VALUE_DEFAULT:
6117
          try:
6118
            del i_bedict[key]
6119
          except KeyError:
6120
            pass
6121
        else:
6122
          i_bedict[key] = val
6123
      cluster = self.cfg.GetClusterInfo()
6124
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
6125
      be_new = cluster.FillDict(cluster.beparams[constants.BEGR_DEFAULT],
6126
                                i_bedict)
6127
      self.be_new = be_new # the new actual values
6128
      self.be_inst = i_bedict # the new dict (without defaults)
6129
    else:
6130
      self.be_new = self.be_inst = {}
6131

    
6132
    self.warn = []
6133

    
6134
    if constants.BE_MEMORY in self.op.beparams and not self.force:
6135
      mem_check_list = [pnode]
6136
      if be_new[constants.BE_AUTO_BALANCE]:
6137
        # either we changed auto_balance to yes or it was from before
6138
        mem_check_list.extend(instance.secondary_nodes)
6139
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
6140
                                                  instance.hypervisor)
6141
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
6142
                                         instance.hypervisor)
6143
      if nodeinfo[pnode].failed or not isinstance(nodeinfo[pnode].data, dict):
6144
        # Assume the primary node is unreachable and go ahead
6145
        self.warn.append("Can't get info from primary node %s" % pnode)
6146
      else:
6147
        if not instance_info.failed and instance_info.data:
6148
          current_mem = int(instance_info.data['memory'])
6149
        else:
6150
          # Assume instance not running
6151
          # (there is a slight race condition here, but it's not very probable,
6152
          # and we have no other way to check)
6153
          current_mem = 0
6154
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
6155
                    nodeinfo[pnode].data['memory_free'])
6156
        if miss_mem > 0:
6157
          raise errors.OpPrereqError("This change will prevent the instance"
6158
                                     " from starting, due to %d MB of memory"
6159
                                     " missing on its primary node" % miss_mem)
6160

    
6161
      if be_new[constants.BE_AUTO_BALANCE]:
6162
        for node, nres in nodeinfo.iteritems():
6163
          if node not in instance.secondary_nodes:
6164
            continue
6165
          if nres.failed or not isinstance(nres.data, dict):
6166
            self.warn.append("Can't get info from secondary node %s" % node)
6167
          elif be_new[constants.BE_MEMORY] > nres.data['memory_free']:
6168
            self.warn.append("Not enough memory to failover instance to"
6169
                             " secondary node %s" % node)
6170

    
6171
    # NIC processing
6172
    for nic_op, nic_dict in self.op.nics:
6173
      if nic_op == constants.DDM_REMOVE:
6174
        if not instance.nics:
6175
          raise errors.OpPrereqError("Instance has no NICs, cannot remove")
6176
        continue
6177
      if nic_op != constants.DDM_ADD:
6178
        # an existing nic
6179
        if nic_op < 0 or nic_op >= len(instance.nics):
6180
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
6181
                                     " are 0 to %d" %
6182
                                     (nic_op, len(instance.nics)))
6183
      if 'bridge' in nic_dict:
6184
        nic_bridge = nic_dict['bridge']
6185
        if nic_bridge is None:
6186
          raise errors.OpPrereqError('Cannot set the nic bridge to None')
6187
        if not self.rpc.call_bridges_exist(pnode, [nic_bridge]):
6188
          msg = ("Bridge '%s' doesn't exist on one of"
6189
                 " the instance nodes" % nic_bridge)
6190
          if self.force:
6191
            self.warn.append(msg)
6192
          else:
6193
            raise errors.OpPrereqError(msg)
6194
      if 'mac' in nic_dict:
6195
        nic_mac = nic_dict['mac']
6196
        if nic_mac is None:
6197
          raise errors.OpPrereqError('Cannot set the nic mac to None')
6198
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6199
          # otherwise generate the mac
6200
          nic_dict['mac'] = self.cfg.GenerateMAC()
6201
        else:
6202
          # or validate/reserve the current one
6203
          if self.cfg.IsMacInUse(nic_mac):
6204
            raise errors.OpPrereqError("MAC address %s already in use"
6205
                                       " in cluster" % nic_mac)
6206

    
6207
    # DISK processing
6208
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
6209
      raise errors.OpPrereqError("Disk operations not supported for"
6210
                                 " diskless instances")
6211
    for disk_op, disk_dict in self.op.disks:
6212
      if disk_op == constants.DDM_REMOVE:
6213
        if len(instance.disks) == 1:
6214
          raise errors.OpPrereqError("Cannot remove the last disk of"
6215
                                     " an instance")
6216
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
6217
        ins_l = ins_l[pnode]
6218
        if ins_l.failed or not isinstance(ins_l.data, list):
6219
          raise errors.OpPrereqError("Can't contact node '%s'" % pnode)
6220
        if instance.name in ins_l.data:
6221
          raise errors.OpPrereqError("Instance is running, can't remove"
6222
                                     " disks.")
6223

    
6224
      if (disk_op == constants.DDM_ADD and
6225
          len(instance.nics) >= constants.MAX_DISKS):
6226
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
6227
                                   " add more" % constants.MAX_DISKS)
6228
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
6229
        # an existing disk
6230
        if disk_op < 0 or disk_op >= len(instance.disks):
6231
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
6232
                                     " are 0 to %d" %
6233
                                     (disk_op, len(instance.disks)))
6234

    
6235
    return
6236

    
6237
  def Exec(self, feedback_fn):
6238
    """Modifies an instance.
6239

6240
    All parameters take effect only at the next restart of the instance.
6241

6242
    """
6243
    # Process here the warnings from CheckPrereq, as we don't have a
6244
    # feedback_fn there.
6245
    for warn in self.warn:
6246
      feedback_fn("WARNING: %s" % warn)
6247

    
6248
    result = []
6249
    instance = self.instance
6250
    # disk changes
6251
    for disk_op, disk_dict in self.op.disks:
6252
      if disk_op == constants.DDM_REMOVE:
6253
        # remove the last disk
6254
        device = instance.disks.pop()
6255
        device_idx = len(instance.disks)
6256
        for node, disk in device.ComputeNodeTree(instance.primary_node):
6257
          self.cfg.SetDiskID(disk, node)
6258
          msg = self.rpc.call_blockdev_remove(node, disk).RemoteFailMsg()
6259
          if msg:
6260
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
6261
                            " continuing anyway", device_idx, node, msg)
6262
        result.append(("disk/%d" % device_idx, "remove"))
6263
      elif disk_op == constants.DDM_ADD:
6264
        # add a new disk
6265
        if instance.disk_template == constants.DT_FILE:
6266
          file_driver, file_path = instance.disks[0].logical_id
6267
          file_path = os.path.dirname(file_path)
6268
        else:
6269
          file_driver = file_path = None
6270
        disk_idx_base = len(instance.disks)
6271
        new_disk = _GenerateDiskTemplate(self,
6272
                                         instance.disk_template,
6273
                                         instance.name, instance.primary_node,
6274
                                         instance.secondary_nodes,
6275
                                         [disk_dict],
6276
                                         file_path,
6277
                                         file_driver,
6278
                                         disk_idx_base)[0]
6279
        instance.disks.append(new_disk)
6280
        info = _GetInstanceInfoText(instance)
6281

    
6282
        logging.info("Creating volume %s for instance %s",
6283
                     new_disk.iv_name, instance.name)
6284
        # Note: this needs to be kept in sync with _CreateDisks
6285
        #HARDCODE
6286
        for node in instance.all_nodes:
6287
          f_create = node == instance.primary_node
6288
          try:
6289
            _CreateBlockDev(self, node, instance, new_disk,
6290
                            f_create, info, f_create)
6291
          except errors.OpExecError, err:
6292
            self.LogWarning("Failed to create volume %s (%s) on"
6293
                            " node %s: %s",
6294
                            new_disk.iv_name, new_disk, node, err)
6295
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
6296
                       (new_disk.size, new_disk.mode)))
6297
      else:
6298
        # change a given disk
6299
        instance.disks[disk_op].mode = disk_dict['mode']
6300
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
6301
    # NIC changes
6302
    for nic_op, nic_dict in self.op.nics:
6303
      if nic_op == constants.DDM_REMOVE:
6304
        # remove the last nic
6305
        del instance.nics[-1]
6306
        result.append(("nic.%d" % len(instance.nics), "remove"))
6307
      elif nic_op == constants.DDM_ADD:
6308
        # mac and bridge should be set, by now
6309
        mac = nic_dict['mac']
6310
        bridge = nic_dict['bridge']
6311
        new_nic = objects.NIC(mac=mac, ip=nic_dict.get('ip', None),
6312
                              bridge=bridge)
6313
        instance.nics.append(new_nic)
6314
        result.append(("nic.%d" % (len(instance.nics) - 1),
6315
                       "add:mac=%s,ip=%s,bridge=%s" %
6316
                       (new_nic.mac, new_nic.ip, new_nic.bridge)))
6317
      else:
6318
        # change a given nic
6319
        for key in 'mac', 'ip', 'bridge':
6320
          if key in nic_dict:
6321
            setattr(instance.nics[nic_op], key, nic_dict[key])
6322
            result.append(("nic.%s/%d" % (key, nic_op), nic_dict[key]))
6323

    
6324
    # hvparams changes
6325
    if self.op.hvparams:
6326
      instance.hvparams = self.hv_inst
6327
      for key, val in self.op.hvparams.iteritems():
6328
        result.append(("hv/%s" % key, val))
6329

    
6330
    # beparams changes
6331
    if self.op.beparams:
6332
      instance.beparams = self.be_inst
6333
      for key, val in self.op.beparams.iteritems():
6334
        result.append(("be/%s" % key, val))
6335

    
6336
    self.cfg.Update(instance)
6337

    
6338
    return result
6339

    
6340

    
6341
class LUQueryExports(NoHooksLU):
6342
  """Query the exports list
6343

6344
  """
6345
  _OP_REQP = ['nodes']
6346
  REQ_BGL = False
6347

    
6348
  def ExpandNames(self):
6349
    self.needed_locks = {}
6350
    self.share_locks[locking.LEVEL_NODE] = 1
6351
    if not self.op.nodes:
6352
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6353
    else:
6354
      self.needed_locks[locking.LEVEL_NODE] = \
6355
        _GetWantedNodes(self, self.op.nodes)
6356

    
6357
  def CheckPrereq(self):
6358
    """Check prerequisites.
6359

6360
    """
6361
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
6362

    
6363
  def Exec(self, feedback_fn):
6364
    """Compute the list of all the exported system images.
6365

6366
    @rtype: dict
6367
    @return: a dictionary with the structure node->(export-list)
6368
        where export-list is a list of the instances exported on
6369
        that node.
6370

6371
    """
6372
    rpcresult = self.rpc.call_export_list(self.nodes)
6373
    result = {}
6374
    for node in rpcresult:
6375
      if rpcresult[node].failed:
6376
        result[node] = False
6377
      else:
6378
        result[node] = rpcresult[node].data
6379

    
6380
    return result
6381

    
6382

    
6383
class LUExportInstance(LogicalUnit):
6384
  """Export an instance to an image in the cluster.
6385

6386
  """
6387
  HPATH = "instance-export"
6388
  HTYPE = constants.HTYPE_INSTANCE
6389
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
6390
  REQ_BGL = False
6391

    
6392
  def ExpandNames(self):
6393
    self._ExpandAndLockInstance()
6394
    # FIXME: lock only instance primary and destination node
6395
    #
6396
    # Sad but true, for now we have do lock all nodes, as we don't know where
6397
    # the previous export might be, and and in this LU we search for it and
6398
    # remove it from its current node. In the future we could fix this by:
6399
    #  - making a tasklet to search (share-lock all), then create the new one,
6400
    #    then one to remove, after
6401
    #  - removing the removal operation altoghether
6402
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6403

    
6404
  def DeclareLocks(self, level):
6405
    """Last minute lock declaration."""
6406
    # All nodes are locked anyway, so nothing to do here.
6407

    
6408
  def BuildHooksEnv(self):
6409
    """Build hooks env.
6410

6411
    This will run on the master, primary node and target node.
6412

6413
    """
6414
    env = {
6415
      "EXPORT_NODE": self.op.target_node,
6416
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
6417
      }
6418
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6419
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
6420
          self.op.target_node]
6421
    return env, nl, nl
6422

    
6423
  def CheckPrereq(self):
6424
    """Check prerequisites.
6425

6426
    This checks that the instance and node names are valid.
6427

6428
    """
6429
    instance_name = self.op.instance_name
6430
    self.instance = self.cfg.GetInstanceInfo(instance_name)
6431
    assert self.instance is not None, \
6432
          "Cannot retrieve locked instance %s" % self.op.instance_name
6433
    _CheckNodeOnline(self, self.instance.primary_node)
6434

    
6435
    self.dst_node = self.cfg.GetNodeInfo(
6436
      self.cfg.ExpandNodeName(self.op.target_node))
6437

    
6438
    if self.dst_node is None:
6439
      # This is wrong node name, not a non-locked node
6440
      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
6441
    _CheckNodeOnline(self, self.dst_node.name)
6442
    _CheckNodeNotDrained(self, self.dst_node.name)
6443

    
6444
    # instance disk type verification
6445
    for disk in self.instance.disks:
6446
      if disk.dev_type == constants.LD_FILE:
6447
        raise errors.OpPrereqError("Export not supported for instances with"
6448
                                   " file-based disks")
6449

    
6450
  def Exec(self, feedback_fn):
6451
    """Export an instance to an image in the cluster.
6452

6453
    """
6454
    instance = self.instance
6455
    dst_node = self.dst_node
6456
    src_node = instance.primary_node
6457
    if self.op.shutdown:
6458
      # shutdown the instance, but not the disks
6459
      result = self.rpc.call_instance_shutdown(src_node, instance)
6460
      msg = result.RemoteFailMsg()
6461
      if msg:
6462
        raise errors.OpExecError("Could not shutdown instance %s on"
6463
                                 " node %s: %s" %
6464
                                 (instance.name, src_node, msg))
6465

    
6466
    vgname = self.cfg.GetVGName()
6467

    
6468
    snap_disks = []
6469

    
6470
    # set the disks ID correctly since call_instance_start needs the
6471
    # correct drbd minor to create the symlinks
6472
    for disk in instance.disks:
6473
      self.cfg.SetDiskID(disk, src_node)
6474

    
6475
    try:
6476
      for idx, disk in enumerate(instance.disks):
6477
        # new_dev_name will be a snapshot of an lvm leaf of the one we passed
6478
        new_dev_name = self.rpc.call_blockdev_snapshot(src_node, disk)
6479
        if new_dev_name.failed or not new_dev_name.data:
6480
          self.LogWarning("Could not snapshot disk/%d on node %s",
6481
                          idx, src_node)
6482
          snap_disks.append(False)
6483
        else:
6484
          new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
6485
                                 logical_id=(vgname, new_dev_name.data),
6486
                                 physical_id=(vgname, new_dev_name.data),
6487
                                 iv_name=disk.iv_name)
6488
          snap_disks.append(new_dev)
6489

    
6490
    finally:
6491
      if self.op.shutdown and instance.admin_up:
6492
        result = self.rpc.call_instance_start(src_node, instance, None, None)
6493
        msg = result.RemoteFailMsg()
6494
        if msg:
6495
          _ShutdownInstanceDisks(self, instance)
6496
          raise errors.OpExecError("Could not start instance: %s" % msg)
6497

    
6498
    # TODO: check for size
6499

    
6500
    cluster_name = self.cfg.GetClusterName()
6501
    for idx, dev in enumerate(snap_disks):
6502
      if dev:
6503
        result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
6504
                                               instance, cluster_name, idx)
6505
        if result.failed or not result.data:
6506
          self.LogWarning("Could not export disk/%d from node %s to"
6507
                          " node %s", idx, src_node, dst_node.name)
6508
        msg = self.rpc.call_blockdev_remove(src_node, dev).RemoteFailMsg()
6509
        if msg:
6510
          self.LogWarning("Could not remove snapshot for disk/%d from node"
6511
                          " %s: %s", idx, src_node, msg)
6512

    
6513
    result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
6514
    if result.failed or not result.data:
6515
      self.LogWarning("Could not finalize export for instance %s on node %s",
6516
                      instance.name, dst_node.name)
6517

    
6518
    nodelist = self.cfg.GetNodeList()
6519
    nodelist.remove(dst_node.name)
6520

    
6521
    # on one-node clusters nodelist will be empty after the removal
6522
    # if we proceed the backup would be removed because OpQueryExports
6523
    # substitutes an empty list with the full cluster node list.
6524
    if nodelist:
6525
      exportlist = self.rpc.call_export_list(nodelist)
6526
      for node in exportlist:
6527
        if exportlist[node].failed:
6528
          continue
6529
        if instance.name in exportlist[node].data:
6530
          if not self.rpc.call_export_remove(node, instance.name):
6531
            self.LogWarning("Could not remove older export for instance %s"
6532
                            " on node %s", instance.name, node)
6533

    
6534

    
6535
class LURemoveExport(NoHooksLU):
6536
  """Remove exports related to the named instance.
6537

6538
  """
6539
  _OP_REQP = ["instance_name"]
6540
  REQ_BGL = False
6541

    
6542
  def ExpandNames(self):
6543
    self.needed_locks = {}
6544
    # We need all nodes to be locked in order for RemoveExport to work, but we
6545
    # don't need to lock the instance itself, as nothing will happen to it (and
6546
    # we can remove exports also for a removed instance)
6547
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6548

    
6549
  def CheckPrereq(self):
6550
    """Check prerequisites.
6551
    """
6552
    pass
6553

    
6554
  def Exec(self, feedback_fn):
6555
    """Remove any export.
6556

6557
    """
6558
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
6559
    # If the instance was not found we'll try with the name that was passed in.
6560
    # This will only work if it was an FQDN, though.
6561
    fqdn_warn = False
6562
    if not instance_name:
6563
      fqdn_warn = True
6564
      instance_name = self.op.instance_name
6565

    
6566
    exportlist = self.rpc.call_export_list(self.acquired_locks[
6567
      locking.LEVEL_NODE])
6568
    found = False
6569
    for node in exportlist:
6570
      if exportlist[node].failed:
6571
        self.LogWarning("Failed to query node %s, continuing" % node)
6572
        continue
6573
      if instance_name in exportlist[node].data:
6574
        found = True
6575
        result = self.rpc.call_export_remove(node, instance_name)
6576
        if result.failed or not result.data:
6577
          logging.error("Could not remove export for instance %s"
6578
                        " on node %s", instance_name, node)
6579

    
6580
    if fqdn_warn and not found:
6581
      feedback_fn("Export not found. If trying to remove an export belonging"
6582
                  " to a deleted instance please use its Fully Qualified"
6583
                  " Domain Name.")
6584

    
6585

    
6586
class TagsLU(NoHooksLU):
6587
  """Generic tags LU.
6588

6589
  This is an abstract class which is the parent of all the other tags LUs.
6590

6591
  """
6592

    
6593
  def ExpandNames(self):
6594
    self.needed_locks = {}
6595
    if self.op.kind == constants.TAG_NODE:
6596
      name = self.cfg.ExpandNodeName(self.op.name)
6597
      if name is None:
6598
        raise errors.OpPrereqError("Invalid node name (%s)" %
6599
                                   (self.op.name,))
6600
      self.op.name = name
6601
      self.needed_locks[locking.LEVEL_NODE] = name
6602
    elif self.op.kind == constants.TAG_INSTANCE:
6603
      name = self.cfg.ExpandInstanceName(self.op.name)
6604
      if name is None:
6605
        raise errors.OpPrereqError("Invalid instance name (%s)" %
6606
                                   (self.op.name,))
6607
      self.op.name = name
6608
      self.needed_locks[locking.LEVEL_INSTANCE] = name
6609

    
6610
  def CheckPrereq(self):
6611
    """Check prerequisites.
6612

6613
    """
6614
    if self.op.kind == constants.TAG_CLUSTER:
6615
      self.target = self.cfg.GetClusterInfo()
6616
    elif self.op.kind == constants.TAG_NODE:
6617
      self.target = self.cfg.GetNodeInfo(self.op.name)
6618
    elif self.op.kind == constants.TAG_INSTANCE:
6619
      self.target = self.cfg.GetInstanceInfo(self.op.name)
6620
    else:
6621
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
6622
                                 str(self.op.kind))
6623

    
6624

    
6625
class LUGetTags(TagsLU):
6626
  """Returns the tags of a given object.
6627

6628
  """
6629
  _OP_REQP = ["kind", "name"]
6630
  REQ_BGL = False
6631

    
6632
  def Exec(self, feedback_fn):
6633
    """Returns the tag list.
6634

6635
    """
6636
    return list(self.target.GetTags())
6637

    
6638

    
6639
class LUSearchTags(NoHooksLU):
6640
  """Searches the tags for a given pattern.
6641

6642
  """
6643
  _OP_REQP = ["pattern"]
6644
  REQ_BGL = False
6645

    
6646
  def ExpandNames(self):
6647
    self.needed_locks = {}
6648

    
6649
  def CheckPrereq(self):
6650
    """Check prerequisites.
6651

6652
    This checks the pattern passed for validity by compiling it.
6653

6654
    """
6655
    try:
6656
      self.re = re.compile(self.op.pattern)
6657
    except re.error, err:
6658
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
6659
                                 (self.op.pattern, err))
6660

    
6661
  def Exec(self, feedback_fn):
6662
    """Returns the tag list.
6663

6664
    """
6665
    cfg = self.cfg
6666
    tgts = [("/cluster", cfg.GetClusterInfo())]
6667
    ilist = cfg.GetAllInstancesInfo().values()
6668
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
6669
    nlist = cfg.GetAllNodesInfo().values()
6670
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
6671
    results = []
6672
    for path, target in tgts:
6673
      for tag in target.GetTags():
6674
        if self.re.search(tag):
6675
          results.append((path, tag))
6676
    return results
6677

    
6678

    
6679
class LUAddTags(TagsLU):
6680
  """Sets a tag on a given object.
6681

6682
  """
6683
  _OP_REQP = ["kind", "name", "tags"]
6684
  REQ_BGL = False
6685

    
6686
  def CheckPrereq(self):
6687
    """Check prerequisites.
6688

6689
    This checks the type and length of the tag name and value.
6690

6691
    """
6692
    TagsLU.CheckPrereq(self)
6693
    for tag in self.op.tags:
6694
      objects.TaggableObject.ValidateTag(tag)
6695

    
6696
  def Exec(self, feedback_fn):
6697
    """Sets the tag.
6698

6699
    """
6700
    try:
6701
      for tag in self.op.tags:
6702
        self.target.AddTag(tag)
6703
    except errors.TagError, err:
6704
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
6705
    try:
6706
      self.cfg.Update(self.target)
6707
    except errors.ConfigurationError:
6708
      raise errors.OpRetryError("There has been a modification to the"
6709
                                " config file and the operation has been"
6710
                                " aborted. Please retry.")
6711

    
6712

    
6713
class LUDelTags(TagsLU):
6714
  """Delete a list of tags from a given object.
6715

6716
  """
6717
  _OP_REQP = ["kind", "name", "tags"]
6718
  REQ_BGL = False
6719

    
6720
  def CheckPrereq(self):
6721
    """Check prerequisites.
6722

6723
    This checks that we have the given tag.
6724

6725
    """
6726
    TagsLU.CheckPrereq(self)
6727
    for tag in self.op.tags:
6728
      objects.TaggableObject.ValidateTag(tag)
6729
    del_tags = frozenset(self.op.tags)
6730
    cur_tags = self.target.GetTags()
6731
    if not del_tags <= cur_tags:
6732
      diff_tags = del_tags - cur_tags
6733
      diff_names = ["'%s'" % tag for tag in diff_tags]
6734
      diff_names.sort()
6735
      raise errors.OpPrereqError("Tag(s) %s not found" %
6736
                                 (",".join(diff_names)))
6737

    
6738
  def Exec(self, feedback_fn):
6739
    """Remove the tag from the object.
6740

6741
    """
6742
    for tag in self.op.tags:
6743
      self.target.RemoveTag(tag)
6744
    try:
6745
      self.cfg.Update(self.target)
6746
    except errors.ConfigurationError:
6747
      raise errors.OpRetryError("There has been a modification to the"
6748
                                " config file and the operation has been"
6749
                                " aborted. Please retry.")
6750

    
6751

    
6752
class LUTestDelay(NoHooksLU):
6753
  """Sleep for a specified amount of time.
6754

6755
  This LU sleeps on the master and/or nodes for a specified amount of
6756
  time.
6757

6758
  """
6759
  _OP_REQP = ["duration", "on_master", "on_nodes"]
6760
  REQ_BGL = False
6761

    
6762
  def ExpandNames(self):
6763
    """Expand names and set required locks.
6764

6765
    This expands the node list, if any.
6766

6767
    """
6768
    self.needed_locks = {}
6769
    if self.op.on_nodes:
6770
      # _GetWantedNodes can be used here, but is not always appropriate to use
6771
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
6772
      # more information.
6773
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
6774
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
6775

    
6776
  def CheckPrereq(self):
6777
    """Check prerequisites.
6778

6779
    """
6780

    
6781
  def Exec(self, feedback_fn):
6782
    """Do the actual sleep.
6783

6784
    """
6785
    if self.op.on_master:
6786
      if not utils.TestDelay(self.op.duration):
6787
        raise errors.OpExecError("Error during master delay test")
6788
    if self.op.on_nodes:
6789
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
6790
      if not result:
6791
        raise errors.OpExecError("Complete failure from rpc call")
6792
      for node, node_result in result.items():
6793
        node_result.Raise()
6794
        if not node_result.data:
6795
          raise errors.OpExecError("Failure during rpc call to node %s,"
6796
                                   " result: %s" % (node, node_result.data))
6797

    
6798

    
6799
class IAllocator(object):
6800
  """IAllocator framework.
6801

6802
  An IAllocator instance has three sets of attributes:
6803
    - cfg that is needed to query the cluster
6804
    - input data (all members of the _KEYS class attribute are required)
6805
    - four buffer attributes (in|out_data|text), that represent the
6806
      input (to the external script) in text and data structure format,
6807
      and the output from it, again in two formats
6808
    - the result variables from the script (success, info, nodes) for
6809
      easy usage
6810

6811
  """
6812
  _ALLO_KEYS = [
6813
    "mem_size", "disks", "disk_template",
6814
    "os", "tags", "nics", "vcpus", "hypervisor",
6815
    ]
6816
  _RELO_KEYS = [
6817
    "relocate_from",
6818
    ]
6819

    
6820
  def __init__(self, lu, mode, name, **kwargs):
6821
    self.lu = lu
6822
    # init buffer variables
6823
    self.in_text = self.out_text = self.in_data = self.out_data = None
6824
    # init all input fields so that pylint is happy
6825
    self.mode = mode
6826
    self.name = name
6827
    self.mem_size = self.disks = self.disk_template = None
6828
    self.os = self.tags = self.nics = self.vcpus = None
6829
    self.hypervisor = None
6830
    self.relocate_from = None
6831
    # computed fields
6832
    self.required_nodes = None
6833
    # init result fields
6834
    self.success = self.info = self.nodes = None
6835
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
6836
      keyset = self._ALLO_KEYS
6837
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
6838
      keyset = self._RELO_KEYS
6839
    else:
6840
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
6841
                                   " IAllocator" % self.mode)
6842
    for key in kwargs:
6843
      if key not in keyset:
6844
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
6845
                                     " IAllocator" % key)
6846
      setattr(self, key, kwargs[key])
6847
    for key in keyset:
6848
      if key not in kwargs:
6849
        raise errors.ProgrammerError("Missing input parameter '%s' to"
6850
                                     " IAllocator" % key)
6851
    self._BuildInputData()
6852

    
6853
  def _ComputeClusterData(self):
6854
    """Compute the generic allocator input data.
6855

6856
    This is the data that is independent of the actual operation.
6857

6858
    """
6859
    cfg = self.lu.cfg
6860
    cluster_info = cfg.GetClusterInfo()
6861
    # cluster data
6862
    data = {
6863
      "version": constants.IALLOCATOR_VERSION,
6864
      "cluster_name": cfg.GetClusterName(),
6865
      "cluster_tags": list(cluster_info.GetTags()),
6866
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
6867
      # we don't have job IDs
6868
      }
6869
    iinfo = cfg.GetAllInstancesInfo().values()
6870
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
6871

    
6872
    # node data
6873
    node_results = {}
6874
    node_list = cfg.GetNodeList()
6875

    
6876
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
6877
      hypervisor_name = self.hypervisor
6878
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
6879
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
6880

    
6881
    node_data = self.lu.rpc.call_node_info(node_list, cfg.GetVGName(),
6882
                                           hypervisor_name)
6883
    node_iinfo = self.lu.rpc.call_all_instances_info(node_list,
6884
                       cluster_info.enabled_hypervisors)
6885
    for nname, nresult in node_data.items():
6886
      # first fill in static (config-based) values
6887
      ninfo = cfg.GetNodeInfo(nname)
6888
      pnr = {
6889
        "tags": list(ninfo.GetTags()),
6890
        "primary_ip": ninfo.primary_ip,
6891
        "secondary_ip": ninfo.secondary_ip,
6892
        "offline": ninfo.offline,
6893
        "drained": ninfo.drained,
6894
        "master_candidate": ninfo.master_candidate,
6895
        }
6896

    
6897
      if not ninfo.offline:
6898
        nresult.Raise()
6899
        if not isinstance(nresult.data, dict):
6900
          raise errors.OpExecError("Can't get data for node %s" % nname)
6901
        remote_info = nresult.data
6902
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
6903
                     'vg_size', 'vg_free', 'cpu_total']:
6904
          if attr not in remote_info:
6905
            raise errors.OpExecError("Node '%s' didn't return attribute"
6906
                                     " '%s'" % (nname, attr))
6907
          try:
6908
            remote_info[attr] = int(remote_info[attr])
6909
          except ValueError, err:
6910
            raise errors.OpExecError("Node '%s' returned invalid value"
6911
                                     " for '%s': %s" % (nname, attr, err))
6912
        # compute memory used by primary instances
6913
        i_p_mem = i_p_up_mem = 0
6914
        for iinfo, beinfo in i_list:
6915
          if iinfo.primary_node == nname:
6916
            i_p_mem += beinfo[constants.BE_MEMORY]
6917
            if iinfo.name not in node_iinfo[nname].data:
6918
              i_used_mem = 0
6919
            else:
6920
              i_used_mem = int(node_iinfo[nname].data[iinfo.name]['memory'])
6921
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
6922
            remote_info['memory_free'] -= max(0, i_mem_diff)
6923

    
6924
            if iinfo.admin_up:
6925
              i_p_up_mem += beinfo[constants.BE_MEMORY]
6926

    
6927
        # compute memory used by instances
6928
        pnr_dyn = {
6929
          "total_memory": remote_info['memory_total'],
6930
          "reserved_memory": remote_info['memory_dom0'],
6931
          "free_memory": remote_info['memory_free'],
6932
          "total_disk": remote_info['vg_size'],
6933
          "free_disk": remote_info['vg_free'],
6934
          "total_cpus": remote_info['cpu_total'],
6935
          "i_pri_memory": i_p_mem,
6936
          "i_pri_up_memory": i_p_up_mem,
6937
          }
6938
        pnr.update(pnr_dyn)
6939

    
6940
      node_results[nname] = pnr
6941
    data["nodes"] = node_results
6942

    
6943
    # instance data
6944
    instance_data = {}
6945
    for iinfo, beinfo in i_list:
6946
      nic_data = [{"mac": n.mac, "ip": n.ip, "bridge": n.bridge}
6947
                  for n in iinfo.nics]
6948
      pir = {
6949
        "tags": list(iinfo.GetTags()),
6950
        "admin_up": iinfo.admin_up,
6951
        "vcpus": beinfo[constants.BE_VCPUS],
6952
        "memory": beinfo[constants.BE_MEMORY],
6953
        "os": iinfo.os,
6954
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
6955
        "nics": nic_data,
6956
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
6957
        "disk_template": iinfo.disk_template,
6958
        "hypervisor": iinfo.hypervisor,
6959
        }
6960
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
6961
                                                 pir["disks"])
6962
      instance_data[iinfo.name] = pir
6963

    
6964
    data["instances"] = instance_data
6965

    
6966
    self.in_data = data
6967

    
6968
  def _AddNewInstance(self):
6969
    """Add new instance data to allocator structure.
6970

6971
    This in combination with _AllocatorGetClusterData will create the
6972
    correct structure needed as input for the allocator.
6973

6974
    The checks for the completeness of the opcode must have already been
6975
    done.
6976

6977
    """
6978
    data = self.in_data
6979

    
6980
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
6981

    
6982
    if self.disk_template in constants.DTS_NET_MIRROR:
6983
      self.required_nodes = 2
6984
    else:
6985
      self.required_nodes = 1
6986
    request = {
6987
      "type": "allocate",
6988
      "name": self.name,
6989
      "disk_template": self.disk_template,
6990
      "tags": self.tags,
6991
      "os": self.os,
6992
      "vcpus": self.vcpus,
6993
      "memory": self.mem_size,
6994
      "disks": self.disks,
6995
      "disk_space_total": disk_space,
6996
      "nics": self.nics,
6997
      "required_nodes": self.required_nodes,
6998
      }
6999
    data["request"] = request
7000

    
7001
  def _AddRelocateInstance(self):
7002
    """Add relocate instance data to allocator structure.
7003

7004
    This in combination with _IAllocatorGetClusterData will create the
7005
    correct structure needed as input for the allocator.
7006

7007
    The checks for the completeness of the opcode must have already been
7008
    done.
7009

7010
    """
7011
    instance = self.lu.cfg.GetInstanceInfo(self.name)
7012
    if instance is None:
7013
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
7014
                                   " IAllocator" % self.name)
7015

    
7016
    if instance.disk_template not in constants.DTS_NET_MIRROR:
7017
      raise errors.OpPrereqError("Can't relocate non-mirrored instances")
7018

    
7019
    if len(instance.secondary_nodes) != 1:
7020
      raise errors.OpPrereqError("Instance has not exactly one secondary node")
7021

    
7022
    self.required_nodes = 1
7023
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
7024
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
7025

    
7026
    request = {
7027
      "type": "relocate",
7028
      "name": self.name,
7029
      "disk_space_total": disk_space,
7030
      "required_nodes": self.required_nodes,
7031
      "relocate_from": self.relocate_from,
7032
      }
7033
    self.in_data["request"] = request
7034

    
7035
  def _BuildInputData(self):
7036
    """Build input data structures.
7037

7038
    """
7039
    self._ComputeClusterData()
7040

    
7041
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7042
      self._AddNewInstance()
7043
    else:
7044
      self._AddRelocateInstance()
7045

    
7046
    self.in_text = serializer.Dump(self.in_data)
7047

    
7048
  def Run(self, name, validate=True, call_fn=None):
7049
    """Run an instance allocator and return the results.
7050

7051
    """
7052
    if call_fn is None:
7053
      call_fn = self.lu.rpc.call_iallocator_runner
7054
    data = self.in_text
7055

    
7056
    result = call_fn(self.lu.cfg.GetMasterNode(), name, self.in_text)
7057
    result.Raise()
7058

    
7059
    if not isinstance(result.data, (list, tuple)) or len(result.data) != 4:
7060
      raise errors.OpExecError("Invalid result from master iallocator runner")
7061

    
7062
    rcode, stdout, stderr, fail = result.data
7063

    
7064
    if rcode == constants.IARUN_NOTFOUND:
7065
      raise errors.OpExecError("Can't find allocator '%s'" % name)
7066
    elif rcode == constants.IARUN_FAILURE:
7067
      raise errors.OpExecError("Instance allocator call failed: %s,"
7068
                               " output: %s" % (fail, stdout+stderr))
7069
    self.out_text = stdout
7070
    if validate:
7071
      self._ValidateResult()
7072

    
7073
  def _ValidateResult(self):
7074
    """Process the allocator results.
7075

7076
    This will process and if successful save the result in
7077
    self.out_data and the other parameters.
7078

7079
    """
7080
    try:
7081
      rdict = serializer.Load(self.out_text)
7082
    except Exception, err:
7083
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
7084

    
7085
    if not isinstance(rdict, dict):
7086
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
7087

    
7088
    for key in "success", "info", "nodes":
7089
      if key not in rdict:
7090
        raise errors.OpExecError("Can't parse iallocator results:"
7091
                                 " missing key '%s'" % key)
7092
      setattr(self, key, rdict[key])
7093

    
7094
    if not isinstance(rdict["nodes"], list):
7095
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
7096
                               " is not a list")
7097
    self.out_data = rdict
7098

    
7099

    
7100
class LUTestAllocator(NoHooksLU):
7101
  """Run allocator tests.
7102

7103
  This LU runs the allocator tests
7104

7105
  """
7106
  _OP_REQP = ["direction", "mode", "name"]
7107

    
7108
  def CheckPrereq(self):
7109
    """Check prerequisites.
7110

7111
    This checks the opcode parameters depending on the director and mode test.
7112

7113
    """
7114
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
7115
      for attr in ["name", "mem_size", "disks", "disk_template",
7116
                   "os", "tags", "nics", "vcpus"]:
7117
        if not hasattr(self.op, attr):
7118
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
7119
                                     attr)
7120
      iname = self.cfg.ExpandInstanceName(self.op.name)
7121
      if iname is not None:
7122
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
7123
                                   iname)
7124
      if not isinstance(self.op.nics, list):
7125
        raise errors.OpPrereqError("Invalid parameter 'nics'")
7126
      for row in self.op.nics:
7127
        if (not isinstance(row, dict) or
7128
            "mac" not in row or
7129
            "ip" not in row or
7130
            "bridge" not in row):
7131
          raise errors.OpPrereqError("Invalid contents of the"
7132
                                     " 'nics' parameter")
7133
      if not isinstance(self.op.disks, list):
7134
        raise errors.OpPrereqError("Invalid parameter 'disks'")
7135
      for row in self.op.disks:
7136
        if (not isinstance(row, dict) or
7137
            "size" not in row or
7138
            not isinstance(row["size"], int) or
7139
            "mode" not in row or
7140
            row["mode"] not in ['r', 'w']):
7141
          raise errors.OpPrereqError("Invalid contents of the"
7142
                                     " 'disks' parameter")
7143
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
7144
        self.op.hypervisor = self.cfg.GetHypervisorType()
7145
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
7146
      if not hasattr(self.op, "name"):
7147
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
7148
      fname = self.cfg.ExpandInstanceName(self.op.name)
7149
      if fname is None:
7150
        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
7151
                                   self.op.name)
7152
      self.op.name = fname
7153
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
7154
    else:
7155
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
7156
                                 self.op.mode)
7157

    
7158
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
7159
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
7160
        raise errors.OpPrereqError("Missing allocator name")
7161
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
7162
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
7163
                                 self.op.direction)
7164

    
7165
  def Exec(self, feedback_fn):
7166
    """Run the allocator test.
7167

7168
    """
7169
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
7170
      ial = IAllocator(self,
7171
                       mode=self.op.mode,
7172
                       name=self.op.name,
7173
                       mem_size=self.op.mem_size,
7174
                       disks=self.op.disks,
7175
                       disk_template=self.op.disk_template,
7176
                       os=self.op.os,
7177
                       tags=self.op.tags,
7178
                       nics=self.op.nics,
7179
                       vcpus=self.op.vcpus,
7180
                       hypervisor=self.op.hypervisor,
7181
                       )
7182
    else:
7183
      ial = IAllocator(self,
7184
                       mode=self.op.mode,
7185
                       name=self.op.name,
7186
                       relocate_from=list(self.relocate_from),
7187
                       )
7188

    
7189
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
7190
      result = ial.in_text
7191
    else:
7192
      ial.Run(self.op.allocator, validate=False)
7193
      result = ial.out_text
7194
    return result