Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 3eccac06

History | View | Annotate | Download (250.9 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0613,W0201
25

    
26
import os
27
import os.path
28
import time
29
import tempfile
30
import re
31
import platform
32
import logging
33
import copy
34
import random
35

    
36
from ganeti import ssh
37
from ganeti import utils
38
from ganeti import errors
39
from ganeti import hypervisor
40
from ganeti import locking
41
from ganeti import constants
42
from ganeti import objects
43
from ganeti import opcodes
44
from ganeti import serializer
45
from ganeti import ssconf
46

    
47

    
48
class LogicalUnit(object):
49
  """Logical Unit base class.
50

51
  Subclasses must follow these rules:
52
    - implement ExpandNames
53
    - implement CheckPrereq
54
    - implement Exec
55
    - implement BuildHooksEnv
56
    - redefine HPATH and HTYPE
57
    - optionally redefine their run requirements:
58
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
59

60
  Note that all commands require root permissions.
61

62
  """
63
  HPATH = None
64
  HTYPE = None
65
  _OP_REQP = []
66
  REQ_BGL = True
67

    
68
  def __init__(self, processor, op, context, rpc):
69
    """Constructor for LogicalUnit.
70

71
    This needs to be overriden in derived classes in order to check op
72
    validity.
73

74
    """
75
    self.proc = processor
76
    self.op = op
77
    self.cfg = context.cfg
78
    self.context = context
79
    self.rpc = rpc
80
    # Dicts used to declare locking needs to mcpu
81
    self.needed_locks = None
82
    self.acquired_locks = {}
83
    self.share_locks = dict(((i, 0) for i in locking.LEVELS))
84
    self.add_locks = {}
85
    self.remove_locks = {}
86
    # Used to force good behavior when calling helper functions
87
    self.recalculate_locks = {}
88
    self.__ssh = None
89
    # logging
90
    self.LogWarning = processor.LogWarning
91
    self.LogInfo = processor.LogInfo
92

    
93
    for attr_name in self._OP_REQP:
94
      attr_val = getattr(op, attr_name, None)
95
      if attr_val is None:
96
        raise errors.OpPrereqError("Required parameter '%s' missing" %
97
                                   attr_name)
98
    self.CheckArguments()
99

    
100
  def __GetSSH(self):
101
    """Returns the SshRunner object
102

103
    """
104
    if not self.__ssh:
105
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
106
    return self.__ssh
107

    
108
  ssh = property(fget=__GetSSH)
109

    
110
  def CheckArguments(self):
111
    """Check syntactic validity for the opcode arguments.
112

113
    This method is for doing a simple syntactic check and ensure
114
    validity of opcode parameters, without any cluster-related
115
    checks. While the same can be accomplished in ExpandNames and/or
116
    CheckPrereq, doing these separate is better because:
117

118
      - ExpandNames is left as as purely a lock-related function
119
      - CheckPrereq is run after we have aquired locks (and possible
120
        waited for them)
121

122
    The function is allowed to change the self.op attribute so that
123
    later methods can no longer worry about missing parameters.
124

125
    """
126
    pass
127

    
128
  def ExpandNames(self):
129
    """Expand names for this LU.
130

131
    This method is called before starting to execute the opcode, and it should
132
    update all the parameters of the opcode to their canonical form (e.g. a
133
    short node name must be fully expanded after this method has successfully
134
    completed). This way locking, hooks, logging, ecc. can work correctly.
135

136
    LUs which implement this method must also populate the self.needed_locks
137
    member, as a dict with lock levels as keys, and a list of needed lock names
138
    as values. Rules:
139

140
      - use an empty dict if you don't need any lock
141
      - if you don't need any lock at a particular level omit that level
142
      - don't put anything for the BGL level
143
      - if you want all locks at a level use locking.ALL_SET as a value
144

145
    If you need to share locks (rather than acquire them exclusively) at one
146
    level you can modify self.share_locks, setting a true value (usually 1) for
147
    that level. By default locks are not shared.
148

149
    Examples::
150

151
      # Acquire all nodes and one instance
152
      self.needed_locks = {
153
        locking.LEVEL_NODE: locking.ALL_SET,
154
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
155
      }
156
      # Acquire just two nodes
157
      self.needed_locks = {
158
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
159
      }
160
      # Acquire no locks
161
      self.needed_locks = {} # No, you can't leave it to the default value None
162

163
    """
164
    # The implementation of this method is mandatory only if the new LU is
165
    # concurrent, so that old LUs don't need to be changed all at the same
166
    # time.
167
    if self.REQ_BGL:
168
      self.needed_locks = {} # Exclusive LUs don't need locks.
169
    else:
170
      raise NotImplementedError
171

    
172
  def DeclareLocks(self, level):
173
    """Declare LU locking needs for a level
174

175
    While most LUs can just declare their locking needs at ExpandNames time,
176
    sometimes there's the need to calculate some locks after having acquired
177
    the ones before. This function is called just before acquiring locks at a
178
    particular level, but after acquiring the ones at lower levels, and permits
179
    such calculations. It can be used to modify self.needed_locks, and by
180
    default it does nothing.
181

182
    This function is only called if you have something already set in
183
    self.needed_locks for the level.
184

185
    @param level: Locking level which is going to be locked
186
    @type level: member of ganeti.locking.LEVELS
187

188
    """
189

    
190
  def CheckPrereq(self):
191
    """Check prerequisites for this LU.
192

193
    This method should check that the prerequisites for the execution
194
    of this LU are fulfilled. It can do internode communication, but
195
    it should be idempotent - no cluster or system changes are
196
    allowed.
197

198
    The method should raise errors.OpPrereqError in case something is
199
    not fulfilled. Its return value is ignored.
200

201
    This method should also update all the parameters of the opcode to
202
    their canonical form if it hasn't been done by ExpandNames before.
203

204
    """
205
    raise NotImplementedError
206

    
207
  def Exec(self, feedback_fn):
208
    """Execute the LU.
209

210
    This method should implement the actual work. It should raise
211
    errors.OpExecError for failures that are somewhat dealt with in
212
    code, or expected.
213

214
    """
215
    raise NotImplementedError
216

    
217
  def BuildHooksEnv(self):
218
    """Build hooks environment for this LU.
219

220
    This method should return a three-node tuple consisting of: a dict
221
    containing the environment that will be used for running the
222
    specific hook for this LU, a list of node names on which the hook
223
    should run before the execution, and a list of node names on which
224
    the hook should run after the execution.
225

226
    The keys of the dict must not have 'GANETI_' prefixed as this will
227
    be handled in the hooks runner. Also note additional keys will be
228
    added by the hooks runner. If the LU doesn't define any
229
    environment, an empty dict (and not None) should be returned.
230

231
    No nodes should be returned as an empty list (and not None).
232

233
    Note that if the HPATH for a LU class is None, this function will
234
    not be called.
235

236
    """
237
    raise NotImplementedError
238

    
239
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
240
    """Notify the LU about the results of its hooks.
241

242
    This method is called every time a hooks phase is executed, and notifies
243
    the Logical Unit about the hooks' result. The LU can then use it to alter
244
    its result based on the hooks.  By default the method does nothing and the
245
    previous result is passed back unchanged but any LU can define it if it
246
    wants to use the local cluster hook-scripts somehow.
247

248
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
249
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
250
    @param hook_results: the results of the multi-node hooks rpc call
251
    @param feedback_fn: function used send feedback back to the caller
252
    @param lu_result: the previous Exec result this LU had, or None
253
        in the PRE phase
254
    @return: the new Exec result, based on the previous result
255
        and hook results
256

257
    """
258
    return lu_result
259

    
260
  def _ExpandAndLockInstance(self):
261
    """Helper function to expand and lock an instance.
262

263
    Many LUs that work on an instance take its name in self.op.instance_name
264
    and need to expand it and then declare the expanded name for locking. This
265
    function does it, and then updates self.op.instance_name to the expanded
266
    name. It also initializes needed_locks as a dict, if this hasn't been done
267
    before.
268

269
    """
270
    if self.needed_locks is None:
271
      self.needed_locks = {}
272
    else:
273
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
274
        "_ExpandAndLockInstance called with instance-level locks set"
275
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
276
    if expanded_name is None:
277
      raise errors.OpPrereqError("Instance '%s' not known" %
278
                                  self.op.instance_name)
279
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
280
    self.op.instance_name = expanded_name
281

    
282
  def _LockInstancesNodes(self, primary_only=False):
283
    """Helper function to declare instances' nodes for locking.
284

285
    This function should be called after locking one or more instances to lock
286
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
287
    with all primary or secondary nodes for instances already locked and
288
    present in self.needed_locks[locking.LEVEL_INSTANCE].
289

290
    It should be called from DeclareLocks, and for safety only works if
291
    self.recalculate_locks[locking.LEVEL_NODE] is set.
292

293
    In the future it may grow parameters to just lock some instance's nodes, or
294
    to just lock primaries or secondary nodes, if needed.
295

296
    If should be called in DeclareLocks in a way similar to::
297

298
      if level == locking.LEVEL_NODE:
299
        self._LockInstancesNodes()
300

301
    @type primary_only: boolean
302
    @param primary_only: only lock primary nodes of locked instances
303

304
    """
305
    assert locking.LEVEL_NODE in self.recalculate_locks, \
306
      "_LockInstancesNodes helper function called with no nodes to recalculate"
307

    
308
    # TODO: check if we're really been called with the instance locks held
309

    
310
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
311
    # future we might want to have different behaviors depending on the value
312
    # of self.recalculate_locks[locking.LEVEL_NODE]
313
    wanted_nodes = []
314
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
315
      instance = self.context.cfg.GetInstanceInfo(instance_name)
316
      wanted_nodes.append(instance.primary_node)
317
      if not primary_only:
318
        wanted_nodes.extend(instance.secondary_nodes)
319

    
320
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
321
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
322
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
323
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
324

    
325
    del self.recalculate_locks[locking.LEVEL_NODE]
326

    
327

    
328
class NoHooksLU(LogicalUnit):
329
  """Simple LU which runs no hooks.
330

331
  This LU is intended as a parent for other LogicalUnits which will
332
  run no hooks, in order to reduce duplicate code.
333

334
  """
335
  HPATH = None
336
  HTYPE = None
337

    
338

    
339
def _GetWantedNodes(lu, nodes):
340
  """Returns list of checked and expanded node names.
341

342
  @type lu: L{LogicalUnit}
343
  @param lu: the logical unit on whose behalf we execute
344
  @type nodes: list
345
  @param nodes: list of node names or None for all nodes
346
  @rtype: list
347
  @return: the list of nodes, sorted
348
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
349

350
  """
351
  if not isinstance(nodes, list):
352
    raise errors.OpPrereqError("Invalid argument type 'nodes'")
353

    
354
  if not nodes:
355
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
356
      " non-empty list of nodes whose name is to be expanded.")
357

    
358
  wanted = []
359
  for name in nodes:
360
    node = lu.cfg.ExpandNodeName(name)
361
    if node is None:
362
      raise errors.OpPrereqError("No such node name '%s'" % name)
363
    wanted.append(node)
364

    
365
  return utils.NiceSort(wanted)
366

    
367

    
368
def _GetWantedInstances(lu, instances):
369
  """Returns list of checked and expanded instance names.
370

371
  @type lu: L{LogicalUnit}
372
  @param lu: the logical unit on whose behalf we execute
373
  @type instances: list
374
  @param instances: list of instance names or None for all instances
375
  @rtype: list
376
  @return: the list of instances, sorted
377
  @raise errors.OpPrereqError: if the instances parameter is wrong type
378
  @raise errors.OpPrereqError: if any of the passed instances is not found
379

380
  """
381
  if not isinstance(instances, list):
382
    raise errors.OpPrereqError("Invalid argument type 'instances'")
383

    
384
  if instances:
385
    wanted = []
386

    
387
    for name in instances:
388
      instance = lu.cfg.ExpandInstanceName(name)
389
      if instance is None:
390
        raise errors.OpPrereqError("No such instance name '%s'" % name)
391
      wanted.append(instance)
392

    
393
  else:
394
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
395
  return wanted
396

    
397

    
398
def _CheckOutputFields(static, dynamic, selected):
399
  """Checks whether all selected fields are valid.
400

401
  @type static: L{utils.FieldSet}
402
  @param static: static fields set
403
  @type dynamic: L{utils.FieldSet}
404
  @param dynamic: dynamic fields set
405

406
  """
407
  f = utils.FieldSet()
408
  f.Extend(static)
409
  f.Extend(dynamic)
410

    
411
  delta = f.NonMatching(selected)
412
  if delta:
413
    raise errors.OpPrereqError("Unknown output fields selected: %s"
414
                               % ",".join(delta))
415

    
416

    
417
def _CheckBooleanOpField(op, name):
418
  """Validates boolean opcode parameters.
419

420
  This will ensure that an opcode parameter is either a boolean value,
421
  or None (but that it always exists).
422

423
  """
424
  val = getattr(op, name, None)
425
  if not (val is None or isinstance(val, bool)):
426
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
427
                               (name, str(val)))
428
  setattr(op, name, val)
429

    
430

    
431
def _CheckNodeOnline(lu, node):
432
  """Ensure that a given node is online.
433

434
  @param lu: the LU on behalf of which we make the check
435
  @param node: the node to check
436
  @raise errors.OpPrereqError: if the node is offline
437

438
  """
439
  if lu.cfg.GetNodeInfo(node).offline:
440
    raise errors.OpPrereqError("Can't use offline node %s" % node)
441

    
442

    
443
def _CheckNodeNotDrained(lu, node):
444
  """Ensure that a given node is not drained.
445

446
  @param lu: the LU on behalf of which we make the check
447
  @param node: the node to check
448
  @raise errors.OpPrereqError: if the node is drained
449

450
  """
451
  if lu.cfg.GetNodeInfo(node).drained:
452
    raise errors.OpPrereqError("Can't use drained node %s" % node)
453

    
454

    
455
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
456
                          memory, vcpus, nics, disk_template, disks):
457
  """Builds instance related env variables for hooks
458

459
  This builds the hook environment from individual variables.
460

461
  @type name: string
462
  @param name: the name of the instance
463
  @type primary_node: string
464
  @param primary_node: the name of the instance's primary node
465
  @type secondary_nodes: list
466
  @param secondary_nodes: list of secondary nodes as strings
467
  @type os_type: string
468
  @param os_type: the name of the instance's OS
469
  @type status: boolean
470
  @param status: the should_run status of the instance
471
  @type memory: string
472
  @param memory: the memory size of the instance
473
  @type vcpus: string
474
  @param vcpus: the count of VCPUs the instance has
475
  @type nics: list
476
  @param nics: list of tuples (ip, bridge, mac) representing
477
      the NICs the instance  has
478
  @type disk_template: string
479
  @param disk_template: the distk template of the instance
480
  @type disks: list
481
  @param disks: the list of (size, mode) pairs
482
  @rtype: dict
483
  @return: the hook environment for this instance
484

485
  """
486
  if status:
487
    str_status = "up"
488
  else:
489
    str_status = "down"
490
  env = {
491
    "OP_TARGET": name,
492
    "INSTANCE_NAME": name,
493
    "INSTANCE_PRIMARY": primary_node,
494
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
495
    "INSTANCE_OS_TYPE": os_type,
496
    "INSTANCE_STATUS": str_status,
497
    "INSTANCE_MEMORY": memory,
498
    "INSTANCE_VCPUS": vcpus,
499
    "INSTANCE_DISK_TEMPLATE": disk_template,
500
  }
501

    
502
  if nics:
503
    nic_count = len(nics)
504
    for idx, (ip, mac, mode, link) in enumerate(nics):
505
      if ip is None:
506
        ip = ""
507
      env["INSTANCE_NIC%d_IP" % idx] = ip
508
      env["INSTANCE_NIC%d_MAC" % idx] = mac
509
      env["INSTANCE_NIC%d_MODE" % idx] = mode
510
      env["INSTANCE_NIC%d_LINK" % idx] = link
511
      if mode == constants.NIC_MODE_BRIDGED:
512
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
513
  else:
514
    nic_count = 0
515

    
516
  env["INSTANCE_NIC_COUNT"] = nic_count
517

    
518
  if disks:
519
    disk_count = len(disks)
520
    for idx, (size, mode) in enumerate(disks):
521
      env["INSTANCE_DISK%d_SIZE" % idx] = size
522
      env["INSTANCE_DISK%d_MODE" % idx] = mode
523
  else:
524
    disk_count = 0
525

    
526
  env["INSTANCE_DISK_COUNT"] = disk_count
527

    
528
  return env
529

    
530
def _PreBuildNICHooksList(lu, nics):
531
  """Build a list of nic information tuples.
532

533
  This list is suitable to be passed to _BuildInstanceHookEnv.
534

535
  @type lu:  L{LogicalUnit}
536
  @param lu: the logical unit on whose behalf we execute
537
  @type nics: list of L{objects.NIC}
538
  @param nics: list of nics to convert to hooks tuples
539

540
  """
541
  hooks_nics = []
542
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
543
  for nic in nics:
544
    ip = nic.ip
545
    mac = nic.mac
546
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
547
    mode = filled_params[constants.NIC_MODE]
548
    link = filled_params[constants.NIC_LINK]
549
    hooks_nics.append((ip, mac, mode, link))
550
  return hooks_nics
551

    
552
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
553
  """Builds instance related env variables for hooks from an object.
554

555
  @type lu: L{LogicalUnit}
556
  @param lu: the logical unit on whose behalf we execute
557
  @type instance: L{objects.Instance}
558
  @param instance: the instance for which we should build the
559
      environment
560
  @type override: dict
561
  @param override: dictionary with key/values that will override
562
      our values
563
  @rtype: dict
564
  @return: the hook environment dictionary
565

566
  """
567
  bep = lu.cfg.GetClusterInfo().FillBE(instance)
568
  args = {
569
    'name': instance.name,
570
    'primary_node': instance.primary_node,
571
    'secondary_nodes': instance.secondary_nodes,
572
    'os_type': instance.os,
573
    'status': instance.admin_up,
574
    'memory': bep[constants.BE_MEMORY],
575
    'vcpus': bep[constants.BE_VCPUS],
576
    'nics': _PreBuildNICHooksList(lu, instance.nics),
577
    'disk_template': instance.disk_template,
578
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
579
  }
580
  if override:
581
    args.update(override)
582
  return _BuildInstanceHookEnv(**args)
583

    
584

    
585
def _AdjustCandidatePool(lu):
586
  """Adjust the candidate pool after node operations.
587

588
  """
589
  mod_list = lu.cfg.MaintainCandidatePool()
590
  if mod_list:
591
    lu.LogInfo("Promoted nodes to master candidate role: %s",
592
               ", ".join(node.name for node in mod_list))
593
    for name in mod_list:
594
      lu.context.ReaddNode(name)
595
  mc_now, mc_max = lu.cfg.GetMasterCandidateStats()
596
  if mc_now > mc_max:
597
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
598
               (mc_now, mc_max))
599

    
600

    
601
def _CheckNicsBridgesExist(lu, target_nics, target_node,
602
                               profile=constants.PP_DEFAULT):
603
  """Check that the brigdes needed by a list of nics exist.
604

605
  """
606
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
607
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
608
                for nic in target_nics]
609
  brlist = [params[constants.NIC_LINK] for params in paramslist
610
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
611
  if brlist:
612
    result = lu.rpc.call_bridges_exist(target_node, brlist)
613
    result.Raise()
614
    if not result.data:
615
      raise errors.OpPrereqError("One or more target bridges %s does not"
616
                                 " exist on destination node '%s'" %
617
                                 (brlist, target_node))
618

    
619

    
620
def _CheckInstanceBridgesExist(lu, instance, node=None):
621
  """Check that the brigdes needed by an instance exist.
622

623
  """
624
  if node is None:
625
    node=instance.primary_node
626
  _CheckNicsBridgesExist(lu, instance.nics, node)
627

    
628

    
629
class LUDestroyCluster(NoHooksLU):
630
  """Logical unit for destroying the cluster.
631

632
  """
633
  _OP_REQP = []
634

    
635
  def CheckPrereq(self):
636
    """Check prerequisites.
637

638
    This checks whether the cluster is empty.
639

640
    Any errors are signalled by raising errors.OpPrereqError.
641

642
    """
643
    master = self.cfg.GetMasterNode()
644

    
645
    nodelist = self.cfg.GetNodeList()
646
    if len(nodelist) != 1 or nodelist[0] != master:
647
      raise errors.OpPrereqError("There are still %d node(s) in"
648
                                 " this cluster." % (len(nodelist) - 1))
649
    instancelist = self.cfg.GetInstanceList()
650
    if instancelist:
651
      raise errors.OpPrereqError("There are still %d instance(s) in"
652
                                 " this cluster." % len(instancelist))
653

    
654
  def Exec(self, feedback_fn):
655
    """Destroys the cluster.
656

657
    """
658
    master = self.cfg.GetMasterNode()
659
    result = self.rpc.call_node_stop_master(master, False)
660
    result.Raise()
661
    if not result.data:
662
      raise errors.OpExecError("Could not disable the master role")
663
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
664
    utils.CreateBackup(priv_key)
665
    utils.CreateBackup(pub_key)
666
    return master
667

    
668

    
669
class LUVerifyCluster(LogicalUnit):
670
  """Verifies the cluster status.
671

672
  """
673
  HPATH = "cluster-verify"
674
  HTYPE = constants.HTYPE_CLUSTER
675
  _OP_REQP = ["skip_checks"]
676
  REQ_BGL = False
677

    
678
  def ExpandNames(self):
679
    self.needed_locks = {
680
      locking.LEVEL_NODE: locking.ALL_SET,
681
      locking.LEVEL_INSTANCE: locking.ALL_SET,
682
    }
683
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
684

    
685
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
686
                  node_result, feedback_fn, master_files,
687
                  drbd_map, vg_name):
688
    """Run multiple tests against a node.
689

690
    Test list:
691

692
      - compares ganeti version
693
      - checks vg existance and size > 20G
694
      - checks config file checksum
695
      - checks ssh to other nodes
696

697
    @type nodeinfo: L{objects.Node}
698
    @param nodeinfo: the node to check
699
    @param file_list: required list of files
700
    @param local_cksum: dictionary of local files and their checksums
701
    @param node_result: the results from the node
702
    @param feedback_fn: function used to accumulate results
703
    @param master_files: list of files that only masters should have
704
    @param drbd_map: the useddrbd minors for this node, in
705
        form of minor: (instance, must_exist) which correspond to instances
706
        and their running status
707
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
708

709
    """
710
    node = nodeinfo.name
711

    
712
    # main result, node_result should be a non-empty dict
713
    if not node_result or not isinstance(node_result, dict):
714
      feedback_fn("  - ERROR: unable to verify node %s." % (node,))
715
      return True
716

    
717
    # compares ganeti version
718
    local_version = constants.PROTOCOL_VERSION
719
    remote_version = node_result.get('version', None)
720
    if not (remote_version and isinstance(remote_version, (list, tuple)) and
721
            len(remote_version) == 2):
722
      feedback_fn("  - ERROR: connection to %s failed" % (node))
723
      return True
724

    
725
    if local_version != remote_version[0]:
726
      feedback_fn("  - ERROR: incompatible protocol versions: master %s,"
727
                  " node %s %s" % (local_version, node, remote_version[0]))
728
      return True
729

    
730
    # node seems compatible, we can actually try to look into its results
731

    
732
    bad = False
733

    
734
    # full package version
735
    if constants.RELEASE_VERSION != remote_version[1]:
736
      feedback_fn("  - WARNING: software version mismatch: master %s,"
737
                  " node %s %s" %
738
                  (constants.RELEASE_VERSION, node, remote_version[1]))
739

    
740
    # checks vg existence and size > 20G
741
    if vg_name is not None:
742
      vglist = node_result.get(constants.NV_VGLIST, None)
743
      if not vglist:
744
        feedback_fn("  - ERROR: unable to check volume groups on node %s." %
745
                        (node,))
746
        bad = True
747
      else:
748
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
749
                                              constants.MIN_VG_SIZE)
750
        if vgstatus:
751
          feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
752
          bad = True
753

    
754
    # checks config file checksum
755

    
756
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
757
    if not isinstance(remote_cksum, dict):
758
      bad = True
759
      feedback_fn("  - ERROR: node hasn't returned file checksum data")
760
    else:
761
      for file_name in file_list:
762
        node_is_mc = nodeinfo.master_candidate
763
        must_have_file = file_name not in master_files
764
        if file_name not in remote_cksum:
765
          if node_is_mc or must_have_file:
766
            bad = True
767
            feedback_fn("  - ERROR: file '%s' missing" % file_name)
768
        elif remote_cksum[file_name] != local_cksum[file_name]:
769
          if node_is_mc or must_have_file:
770
            bad = True
771
            feedback_fn("  - ERROR: file '%s' has wrong checksum" % file_name)
772
          else:
773
            # not candidate and this is not a must-have file
774
            bad = True
775
            feedback_fn("  - ERROR: non master-candidate has old/wrong file"
776
                        " '%s'" % file_name)
777
        else:
778
          # all good, except non-master/non-must have combination
779
          if not node_is_mc and not must_have_file:
780
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
781
                        " candidates" % file_name)
782

    
783
    # checks ssh to any
784

    
785
    if constants.NV_NODELIST not in node_result:
786
      bad = True
787
      feedback_fn("  - ERROR: node hasn't returned node ssh connectivity data")
788
    else:
789
      if node_result[constants.NV_NODELIST]:
790
        bad = True
791
        for node in node_result[constants.NV_NODELIST]:
792
          feedback_fn("  - ERROR: ssh communication with node '%s': %s" %
793
                          (node, node_result[constants.NV_NODELIST][node]))
794

    
795
    if constants.NV_NODENETTEST not in node_result:
796
      bad = True
797
      feedback_fn("  - ERROR: node hasn't returned node tcp connectivity data")
798
    else:
799
      if node_result[constants.NV_NODENETTEST]:
800
        bad = True
801
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
802
        for node in nlist:
803
          feedback_fn("  - ERROR: tcp communication with node '%s': %s" %
804
                          (node, node_result[constants.NV_NODENETTEST][node]))
805

    
806
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
807
    if isinstance(hyp_result, dict):
808
      for hv_name, hv_result in hyp_result.iteritems():
809
        if hv_result is not None:
810
          feedback_fn("  - ERROR: hypervisor %s verify failure: '%s'" %
811
                      (hv_name, hv_result))
812

    
813
    # check used drbd list
814
    if vg_name is not None:
815
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
816
      if not isinstance(used_minors, (tuple, list)):
817
        feedback_fn("  - ERROR: cannot parse drbd status file: %s" %
818
                    str(used_minors))
819
      else:
820
        for minor, (iname, must_exist) in drbd_map.items():
821
          if minor not in used_minors and must_exist:
822
            feedback_fn("  - ERROR: drbd minor %d of instance %s is"
823
                        " not active" % (minor, iname))
824
            bad = True
825
        for minor in used_minors:
826
          if minor not in drbd_map:
827
            feedback_fn("  - ERROR: unallocated drbd minor %d is in use" %
828
                        minor)
829
            bad = True
830

    
831
    return bad
832

    
833
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
834
                      node_instance, feedback_fn, n_offline):
835
    """Verify an instance.
836

837
    This function checks to see if the required block devices are
838
    available on the instance's node.
839

840
    """
841
    bad = False
842

    
843
    node_current = instanceconfig.primary_node
844

    
845
    node_vol_should = {}
846
    instanceconfig.MapLVsByNode(node_vol_should)
847

    
848
    for node in node_vol_should:
849
      if node in n_offline:
850
        # ignore missing volumes on offline nodes
851
        continue
852
      for volume in node_vol_should[node]:
853
        if node not in node_vol_is or volume not in node_vol_is[node]:
854
          feedback_fn("  - ERROR: volume %s missing on node %s" %
855
                          (volume, node))
856
          bad = True
857

    
858
    if instanceconfig.admin_up:
859
      if ((node_current not in node_instance or
860
          not instance in node_instance[node_current]) and
861
          node_current not in n_offline):
862
        feedback_fn("  - ERROR: instance %s not running on node %s" %
863
                        (instance, node_current))
864
        bad = True
865

    
866
    for node in node_instance:
867
      if (not node == node_current):
868
        if instance in node_instance[node]:
869
          feedback_fn("  - ERROR: instance %s should not run on node %s" %
870
                          (instance, node))
871
          bad = True
872

    
873
    return bad
874

    
875
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
876
    """Verify if there are any unknown volumes in the cluster.
877

878
    The .os, .swap and backup volumes are ignored. All other volumes are
879
    reported as unknown.
880

881
    """
882
    bad = False
883

    
884
    for node in node_vol_is:
885
      for volume in node_vol_is[node]:
886
        if node not in node_vol_should or volume not in node_vol_should[node]:
887
          feedback_fn("  - ERROR: volume %s on node %s should not exist" %
888
                      (volume, node))
889
          bad = True
890
    return bad
891

    
892
  def _VerifyOrphanInstances(self, instancelist, node_instance, feedback_fn):
893
    """Verify the list of running instances.
894

895
    This checks what instances are running but unknown to the cluster.
896

897
    """
898
    bad = False
899
    for node in node_instance:
900
      for runninginstance in node_instance[node]:
901
        if runninginstance not in instancelist:
902
          feedback_fn("  - ERROR: instance %s on node %s should not exist" %
903
                          (runninginstance, node))
904
          bad = True
905
    return bad
906

    
907
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg, feedback_fn):
908
    """Verify N+1 Memory Resilience.
909

910
    Check that if one single node dies we can still start all the instances it
911
    was primary for.
912

913
    """
914
    bad = False
915

    
916
    for node, nodeinfo in node_info.iteritems():
917
      # This code checks that every node which is now listed as secondary has
918
      # enough memory to host all instances it is supposed to should a single
919
      # other node in the cluster fail.
920
      # FIXME: not ready for failover to an arbitrary node
921
      # FIXME: does not support file-backed instances
922
      # WARNING: we currently take into account down instances as well as up
923
      # ones, considering that even if they're down someone might want to start
924
      # them even in the event of a node failure.
925
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
926
        needed_mem = 0
927
        for instance in instances:
928
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
929
          if bep[constants.BE_AUTO_BALANCE]:
930
            needed_mem += bep[constants.BE_MEMORY]
931
        if nodeinfo['mfree'] < needed_mem:
932
          feedback_fn("  - ERROR: not enough memory on node %s to accomodate"
933
                      " failovers should node %s fail" % (node, prinode))
934
          bad = True
935
    return bad
936

    
937
  def CheckPrereq(self):
938
    """Check prerequisites.
939

940
    Transform the list of checks we're going to skip into a set and check that
941
    all its members are valid.
942

943
    """
944
    self.skip_set = frozenset(self.op.skip_checks)
945
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
946
      raise errors.OpPrereqError("Invalid checks to be skipped specified")
947

    
948
  def BuildHooksEnv(self):
949
    """Build hooks env.
950

951
    Cluster-Verify hooks just rone in the post phase and their failure makes
952
    the output be logged in the verify output and the verification to fail.
953

954
    """
955
    all_nodes = self.cfg.GetNodeList()
956
    env = {
957
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
958
      }
959
    for node in self.cfg.GetAllNodesInfo().values():
960
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
961

    
962
    return env, [], all_nodes
963

    
964
  def Exec(self, feedback_fn):
965
    """Verify integrity of cluster, performing various test on nodes.
966

967
    """
968
    bad = False
969
    feedback_fn("* Verifying global settings")
970
    for msg in self.cfg.VerifyConfig():
971
      feedback_fn("  - ERROR: %s" % msg)
972

    
973
    vg_name = self.cfg.GetVGName()
974
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
975
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
976
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
977
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
978
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
979
                        for iname in instancelist)
980
    i_non_redundant = [] # Non redundant instances
981
    i_non_a_balanced = [] # Non auto-balanced instances
982
    n_offline = [] # List of offline nodes
983
    n_drained = [] # List of nodes being drained
984
    node_volume = {}
985
    node_instance = {}
986
    node_info = {}
987
    instance_cfg = {}
988

    
989
    # FIXME: verify OS list
990
    # do local checksums
991
    master_files = [constants.CLUSTER_CONF_FILE]
992

    
993
    file_names = ssconf.SimpleStore().GetFileList()
994
    file_names.append(constants.SSL_CERT_FILE)
995
    file_names.append(constants.RAPI_CERT_FILE)
996
    file_names.extend(master_files)
997

    
998
    local_checksums = utils.FingerprintFiles(file_names)
999

    
1000
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1001
    node_verify_param = {
1002
      constants.NV_FILELIST: file_names,
1003
      constants.NV_NODELIST: [node.name for node in nodeinfo
1004
                              if not node.offline],
1005
      constants.NV_HYPERVISOR: hypervisors,
1006
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1007
                                  node.secondary_ip) for node in nodeinfo
1008
                                 if not node.offline],
1009
      constants.NV_INSTANCELIST: hypervisors,
1010
      constants.NV_VERSION: None,
1011
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1012
      }
1013
    if vg_name is not None:
1014
      node_verify_param[constants.NV_VGLIST] = None
1015
      node_verify_param[constants.NV_LVLIST] = vg_name
1016
      node_verify_param[constants.NV_DRBDLIST] = None
1017
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1018
                                           self.cfg.GetClusterName())
1019

    
1020
    cluster = self.cfg.GetClusterInfo()
1021
    master_node = self.cfg.GetMasterNode()
1022
    all_drbd_map = self.cfg.ComputeDRBDMap()
1023

    
1024
    for node_i in nodeinfo:
1025
      node = node_i.name
1026
      nresult = all_nvinfo[node].data
1027

    
1028
      if node_i.offline:
1029
        feedback_fn("* Skipping offline node %s" % (node,))
1030
        n_offline.append(node)
1031
        continue
1032

    
1033
      if node == master_node:
1034
        ntype = "master"
1035
      elif node_i.master_candidate:
1036
        ntype = "master candidate"
1037
      elif node_i.drained:
1038
        ntype = "drained"
1039
        n_drained.append(node)
1040
      else:
1041
        ntype = "regular"
1042
      feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1043

    
1044
      if all_nvinfo[node].failed or not isinstance(nresult, dict):
1045
        feedback_fn("  - ERROR: connection to %s failed" % (node,))
1046
        bad = True
1047
        continue
1048

    
1049
      node_drbd = {}
1050
      for minor, instance in all_drbd_map[node].items():
1051
        if instance not in instanceinfo:
1052
          feedback_fn("  - ERROR: ghost instance '%s' in temporary DRBD map" %
1053
                      instance)
1054
          # ghost instance should not be running, but otherwise we
1055
          # don't give double warnings (both ghost instance and
1056
          # unallocated minor in use)
1057
          node_drbd[minor] = (instance, False)
1058
        else:
1059
          instance = instanceinfo[instance]
1060
          node_drbd[minor] = (instance.name, instance.admin_up)
1061
      result = self._VerifyNode(node_i, file_names, local_checksums,
1062
                                nresult, feedback_fn, master_files,
1063
                                node_drbd, vg_name)
1064
      bad = bad or result
1065

    
1066
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1067
      if vg_name is None:
1068
        node_volume[node] = {}
1069
      elif isinstance(lvdata, basestring):
1070
        feedback_fn("  - ERROR: LVM problem on node %s: %s" %
1071
                    (node, utils.SafeEncode(lvdata)))
1072
        bad = True
1073
        node_volume[node] = {}
1074
      elif not isinstance(lvdata, dict):
1075
        feedback_fn("  - ERROR: connection to %s failed (lvlist)" % (node,))
1076
        bad = True
1077
        continue
1078
      else:
1079
        node_volume[node] = lvdata
1080

    
1081
      # node_instance
1082
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1083
      if not isinstance(idata, list):
1084
        feedback_fn("  - ERROR: connection to %s failed (instancelist)" %
1085
                    (node,))
1086
        bad = True
1087
        continue
1088

    
1089
      node_instance[node] = idata
1090

    
1091
      # node_info
1092
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1093
      if not isinstance(nodeinfo, dict):
1094
        feedback_fn("  - ERROR: connection to %s failed (hvinfo)" % (node,))
1095
        bad = True
1096
        continue
1097

    
1098
      try:
1099
        node_info[node] = {
1100
          "mfree": int(nodeinfo['memory_free']),
1101
          "pinst": [],
1102
          "sinst": [],
1103
          # dictionary holding all instances this node is secondary for,
1104
          # grouped by their primary node. Each key is a cluster node, and each
1105
          # value is a list of instances which have the key as primary and the
1106
          # current node as secondary.  this is handy to calculate N+1 memory
1107
          # availability if you can only failover from a primary to its
1108
          # secondary.
1109
          "sinst-by-pnode": {},
1110
        }
1111
        # FIXME: devise a free space model for file based instances as well
1112
        if vg_name is not None:
1113
          if (constants.NV_VGLIST not in nresult or
1114
              vg_name not in nresult[constants.NV_VGLIST]):
1115
            feedback_fn("  - ERROR: node %s didn't return data for the"
1116
                        " volume group '%s' - it is either missing or broken" %
1117
                        (node, vg_name))
1118
            bad = True
1119
            continue
1120
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1121
      except (ValueError, KeyError):
1122
        feedback_fn("  - ERROR: invalid nodeinfo value returned"
1123
                    " from node %s" % (node,))
1124
        bad = True
1125
        continue
1126

    
1127
    node_vol_should = {}
1128

    
1129
    for instance in instancelist:
1130
      feedback_fn("* Verifying instance %s" % instance)
1131
      inst_config = instanceinfo[instance]
1132
      result =  self._VerifyInstance(instance, inst_config, node_volume,
1133
                                     node_instance, feedback_fn, n_offline)
1134
      bad = bad or result
1135
      inst_nodes_offline = []
1136

    
1137
      inst_config.MapLVsByNode(node_vol_should)
1138

    
1139
      instance_cfg[instance] = inst_config
1140

    
1141
      pnode = inst_config.primary_node
1142
      if pnode in node_info:
1143
        node_info[pnode]['pinst'].append(instance)
1144
      elif pnode not in n_offline:
1145
        feedback_fn("  - ERROR: instance %s, connection to primary node"
1146
                    " %s failed" % (instance, pnode))
1147
        bad = True
1148

    
1149
      if pnode in n_offline:
1150
        inst_nodes_offline.append(pnode)
1151

    
1152
      # If the instance is non-redundant we cannot survive losing its primary
1153
      # node, so we are not N+1 compliant. On the other hand we have no disk
1154
      # templates with more than one secondary so that situation is not well
1155
      # supported either.
1156
      # FIXME: does not support file-backed instances
1157
      if len(inst_config.secondary_nodes) == 0:
1158
        i_non_redundant.append(instance)
1159
      elif len(inst_config.secondary_nodes) > 1:
1160
        feedback_fn("  - WARNING: multiple secondaries for instance %s"
1161
                    % instance)
1162

    
1163
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1164
        i_non_a_balanced.append(instance)
1165

    
1166
      for snode in inst_config.secondary_nodes:
1167
        if snode in node_info:
1168
          node_info[snode]['sinst'].append(instance)
1169
          if pnode not in node_info[snode]['sinst-by-pnode']:
1170
            node_info[snode]['sinst-by-pnode'][pnode] = []
1171
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1172
        elif snode not in n_offline:
1173
          feedback_fn("  - ERROR: instance %s, connection to secondary node"
1174
                      " %s failed" % (instance, snode))
1175
          bad = True
1176
        if snode in n_offline:
1177
          inst_nodes_offline.append(snode)
1178

    
1179
      if inst_nodes_offline:
1180
        # warn that the instance lives on offline nodes, and set bad=True
1181
        feedback_fn("  - ERROR: instance lives on offline node(s) %s" %
1182
                    ", ".join(inst_nodes_offline))
1183
        bad = True
1184

    
1185
    feedback_fn("* Verifying orphan volumes")
1186
    result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
1187
                                       feedback_fn)
1188
    bad = bad or result
1189

    
1190
    feedback_fn("* Verifying remaining instances")
1191
    result = self._VerifyOrphanInstances(instancelist, node_instance,
1192
                                         feedback_fn)
1193
    bad = bad or result
1194

    
1195
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1196
      feedback_fn("* Verifying N+1 Memory redundancy")
1197
      result = self._VerifyNPlusOneMemory(node_info, instance_cfg, feedback_fn)
1198
      bad = bad or result
1199

    
1200
    feedback_fn("* Other Notes")
1201
    if i_non_redundant:
1202
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1203
                  % len(i_non_redundant))
1204

    
1205
    if i_non_a_balanced:
1206
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1207
                  % len(i_non_a_balanced))
1208

    
1209
    if n_offline:
1210
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1211

    
1212
    if n_drained:
1213
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1214

    
1215
    return not bad
1216

    
1217
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1218
    """Analize the post-hooks' result
1219

1220
    This method analyses the hook result, handles it, and sends some
1221
    nicely-formatted feedback back to the user.
1222

1223
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1224
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1225
    @param hooks_results: the results of the multi-node hooks rpc call
1226
    @param feedback_fn: function used send feedback back to the caller
1227
    @param lu_result: previous Exec result
1228
    @return: the new Exec result, based on the previous result
1229
        and hook results
1230

1231
    """
1232
    # We only really run POST phase hooks, and are only interested in
1233
    # their results
1234
    if phase == constants.HOOKS_PHASE_POST:
1235
      # Used to change hooks' output to proper indentation
1236
      indent_re = re.compile('^', re.M)
1237
      feedback_fn("* Hooks Results")
1238
      if not hooks_results:
1239
        feedback_fn("  - ERROR: general communication failure")
1240
        lu_result = 1
1241
      else:
1242
        for node_name in hooks_results:
1243
          show_node_header = True
1244
          res = hooks_results[node_name]
1245
          if res.failed or res.data is False or not isinstance(res.data, list):
1246
            if res.offline:
1247
              # no need to warn or set fail return value
1248
              continue
1249
            feedback_fn("    Communication failure in hooks execution")
1250
            lu_result = 1
1251
            continue
1252
          for script, hkr, output in res.data:
1253
            if hkr == constants.HKR_FAIL:
1254
              # The node header is only shown once, if there are
1255
              # failing hooks on that node
1256
              if show_node_header:
1257
                feedback_fn("  Node %s:" % node_name)
1258
                show_node_header = False
1259
              feedback_fn("    ERROR: Script %s failed, output:" % script)
1260
              output = indent_re.sub('      ', output)
1261
              feedback_fn("%s" % output)
1262
              lu_result = 1
1263

    
1264
      return lu_result
1265

    
1266

    
1267
class LUVerifyDisks(NoHooksLU):
1268
  """Verifies the cluster disks status.
1269

1270
  """
1271
  _OP_REQP = []
1272
  REQ_BGL = False
1273

    
1274
  def ExpandNames(self):
1275
    self.needed_locks = {
1276
      locking.LEVEL_NODE: locking.ALL_SET,
1277
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1278
    }
1279
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1280

    
1281
  def CheckPrereq(self):
1282
    """Check prerequisites.
1283

1284
    This has no prerequisites.
1285

1286
    """
1287
    pass
1288

    
1289
  def Exec(self, feedback_fn):
1290
    """Verify integrity of cluster disks.
1291

1292
    """
1293
    result = res_nodes, res_nlvm, res_instances, res_missing = [], {}, [], {}
1294

    
1295
    vg_name = self.cfg.GetVGName()
1296
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1297
    instances = [self.cfg.GetInstanceInfo(name)
1298
                 for name in self.cfg.GetInstanceList()]
1299

    
1300
    nv_dict = {}
1301
    for inst in instances:
1302
      inst_lvs = {}
1303
      if (not inst.admin_up or
1304
          inst.disk_template not in constants.DTS_NET_MIRROR):
1305
        continue
1306
      inst.MapLVsByNode(inst_lvs)
1307
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1308
      for node, vol_list in inst_lvs.iteritems():
1309
        for vol in vol_list:
1310
          nv_dict[(node, vol)] = inst
1311

    
1312
    if not nv_dict:
1313
      return result
1314

    
1315
    node_lvs = self.rpc.call_volume_list(nodes, vg_name)
1316

    
1317
    to_act = set()
1318
    for node in nodes:
1319
      # node_volume
1320
      lvs = node_lvs[node]
1321
      if lvs.failed:
1322
        if not lvs.offline:
1323
          self.LogWarning("Connection to node %s failed: %s" %
1324
                          (node, lvs.data))
1325
        continue
1326
      lvs = lvs.data
1327
      if isinstance(lvs, basestring):
1328
        logging.warning("Error enumerating LVs on node %s: %s", node, lvs)
1329
        res_nlvm[node] = lvs
1330
        continue
1331
      elif not isinstance(lvs, dict):
1332
        logging.warning("Connection to node %s failed or invalid data"
1333
                        " returned", node)
1334
        res_nodes.append(node)
1335
        continue
1336

    
1337
      for lv_name, (_, lv_inactive, lv_online) in lvs.iteritems():
1338
        inst = nv_dict.pop((node, lv_name), None)
1339
        if (not lv_online and inst is not None
1340
            and inst.name not in res_instances):
1341
          res_instances.append(inst.name)
1342

    
1343
    # any leftover items in nv_dict are missing LVs, let's arrange the
1344
    # data better
1345
    for key, inst in nv_dict.iteritems():
1346
      if inst.name not in res_missing:
1347
        res_missing[inst.name] = []
1348
      res_missing[inst.name].append(key)
1349

    
1350
    return result
1351

    
1352

    
1353
class LURenameCluster(LogicalUnit):
1354
  """Rename the cluster.
1355

1356
  """
1357
  HPATH = "cluster-rename"
1358
  HTYPE = constants.HTYPE_CLUSTER
1359
  _OP_REQP = ["name"]
1360

    
1361
  def BuildHooksEnv(self):
1362
    """Build hooks env.
1363

1364
    """
1365
    env = {
1366
      "OP_TARGET": self.cfg.GetClusterName(),
1367
      "NEW_NAME": self.op.name,
1368
      }
1369
    mn = self.cfg.GetMasterNode()
1370
    return env, [mn], [mn]
1371

    
1372
  def CheckPrereq(self):
1373
    """Verify that the passed name is a valid one.
1374

1375
    """
1376
    hostname = utils.HostInfo(self.op.name)
1377

    
1378
    new_name = hostname.name
1379
    self.ip = new_ip = hostname.ip
1380
    old_name = self.cfg.GetClusterName()
1381
    old_ip = self.cfg.GetMasterIP()
1382
    if new_name == old_name and new_ip == old_ip:
1383
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1384
                                 " cluster has changed")
1385
    if new_ip != old_ip:
1386
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1387
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1388
                                   " reachable on the network. Aborting." %
1389
                                   new_ip)
1390

    
1391
    self.op.name = new_name
1392

    
1393
  def Exec(self, feedback_fn):
1394
    """Rename the cluster.
1395

1396
    """
1397
    clustername = self.op.name
1398
    ip = self.ip
1399

    
1400
    # shutdown the master IP
1401
    master = self.cfg.GetMasterNode()
1402
    result = self.rpc.call_node_stop_master(master, False)
1403
    if result.failed or not result.data:
1404
      raise errors.OpExecError("Could not disable the master role")
1405

    
1406
    try:
1407
      cluster = self.cfg.GetClusterInfo()
1408
      cluster.cluster_name = clustername
1409
      cluster.master_ip = ip
1410
      self.cfg.Update(cluster)
1411

    
1412
      # update the known hosts file
1413
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1414
      node_list = self.cfg.GetNodeList()
1415
      try:
1416
        node_list.remove(master)
1417
      except ValueError:
1418
        pass
1419
      result = self.rpc.call_upload_file(node_list,
1420
                                         constants.SSH_KNOWN_HOSTS_FILE)
1421
      for to_node, to_result in result.iteritems():
1422
         msg = to_result.RemoteFailMsg()
1423
         if msg:
1424
           msg = ("Copy of file %s to node %s failed: %s" %
1425
                   (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1426
           self.proc.LogWarning(msg)
1427

    
1428
    finally:
1429
      result = self.rpc.call_node_start_master(master, False)
1430
      if result.failed or not result.data:
1431
        self.LogWarning("Could not re-enable the master role on"
1432
                        " the master, please restart manually.")
1433

    
1434

    
1435
def _RecursiveCheckIfLVMBased(disk):
1436
  """Check if the given disk or its children are lvm-based.
1437

1438
  @type disk: L{objects.Disk}
1439
  @param disk: the disk to check
1440
  @rtype: booleean
1441
  @return: boolean indicating whether a LD_LV dev_type was found or not
1442

1443
  """
1444
  if disk.children:
1445
    for chdisk in disk.children:
1446
      if _RecursiveCheckIfLVMBased(chdisk):
1447
        return True
1448
  return disk.dev_type == constants.LD_LV
1449

    
1450

    
1451
class LUSetClusterParams(LogicalUnit):
1452
  """Change the parameters of the cluster.
1453

1454
  """
1455
  HPATH = "cluster-modify"
1456
  HTYPE = constants.HTYPE_CLUSTER
1457
  _OP_REQP = []
1458
  REQ_BGL = False
1459

    
1460
  def CheckArguments(self):
1461
    """Check parameters
1462

1463
    """
1464
    if not hasattr(self.op, "candidate_pool_size"):
1465
      self.op.candidate_pool_size = None
1466
    if self.op.candidate_pool_size is not None:
1467
      try:
1468
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1469
      except (ValueError, TypeError), err:
1470
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1471
                                   str(err))
1472
      if self.op.candidate_pool_size < 1:
1473
        raise errors.OpPrereqError("At least one master candidate needed")
1474

    
1475
  def ExpandNames(self):
1476
    # FIXME: in the future maybe other cluster params won't require checking on
1477
    # all nodes to be modified.
1478
    self.needed_locks = {
1479
      locking.LEVEL_NODE: locking.ALL_SET,
1480
    }
1481
    self.share_locks[locking.LEVEL_NODE] = 1
1482

    
1483
  def BuildHooksEnv(self):
1484
    """Build hooks env.
1485

1486
    """
1487
    env = {
1488
      "OP_TARGET": self.cfg.GetClusterName(),
1489
      "NEW_VG_NAME": self.op.vg_name,
1490
      }
1491
    mn = self.cfg.GetMasterNode()
1492
    return env, [mn], [mn]
1493

    
1494
  def CheckPrereq(self):
1495
    """Check prerequisites.
1496

1497
    This checks whether the given params don't conflict and
1498
    if the given volume group is valid.
1499

1500
    """
1501
    if self.op.vg_name is not None and not self.op.vg_name:
1502
      instances = self.cfg.GetAllInstancesInfo().values()
1503
      for inst in instances:
1504
        for disk in inst.disks:
1505
          if _RecursiveCheckIfLVMBased(disk):
1506
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1507
                                       " lvm-based instances exist")
1508

    
1509
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1510

    
1511
    # if vg_name not None, checks given volume group on all nodes
1512
    if self.op.vg_name:
1513
      vglist = self.rpc.call_vg_list(node_list)
1514
      for node in node_list:
1515
        if vglist[node].failed:
1516
          # ignoring down node
1517
          self.LogWarning("Node %s unreachable/error, ignoring" % node)
1518
          continue
1519
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].data,
1520
                                              self.op.vg_name,
1521
                                              constants.MIN_VG_SIZE)
1522
        if vgstatus:
1523
          raise errors.OpPrereqError("Error on node '%s': %s" %
1524
                                     (node, vgstatus))
1525

    
1526
    self.cluster = cluster = self.cfg.GetClusterInfo()
1527
    # validate params changes
1528
    if self.op.beparams:
1529
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1530
      self.new_beparams = objects.FillDict(
1531
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
1532

    
1533
    if self.op.nicparams:
1534
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
1535
      self.new_nicparams = objects.FillDict(
1536
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
1537
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
1538

    
1539
    # hypervisor list/parameters
1540
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
1541
    if self.op.hvparams:
1542
      if not isinstance(self.op.hvparams, dict):
1543
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
1544
      for hv_name, hv_dict in self.op.hvparams.items():
1545
        if hv_name not in self.new_hvparams:
1546
          self.new_hvparams[hv_name] = hv_dict
1547
        else:
1548
          self.new_hvparams[hv_name].update(hv_dict)
1549

    
1550
    if self.op.enabled_hypervisors is not None:
1551
      self.hv_list = self.op.enabled_hypervisors
1552
    else:
1553
      self.hv_list = cluster.enabled_hypervisors
1554

    
1555
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
1556
      # either the enabled list has changed, or the parameters have, validate
1557
      for hv_name, hv_params in self.new_hvparams.items():
1558
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
1559
            (self.op.enabled_hypervisors and
1560
             hv_name in self.op.enabled_hypervisors)):
1561
          # either this is a new hypervisor, or its parameters have changed
1562
          hv_class = hypervisor.GetHypervisor(hv_name)
1563
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1564
          hv_class.CheckParameterSyntax(hv_params)
1565
          _CheckHVParams(self, node_list, hv_name, hv_params)
1566

    
1567
  def Exec(self, feedback_fn):
1568
    """Change the parameters of the cluster.
1569

1570
    """
1571
    if self.op.vg_name is not None:
1572
      new_volume = self.op.vg_name
1573
      if not new_volume:
1574
        new_volume = None
1575
      if new_volume != self.cfg.GetVGName():
1576
        self.cfg.SetVGName(new_volume)
1577
      else:
1578
        feedback_fn("Cluster LVM configuration already in desired"
1579
                    " state, not changing")
1580
    if self.op.hvparams:
1581
      self.cluster.hvparams = self.new_hvparams
1582
    if self.op.enabled_hypervisors is not None:
1583
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1584
    if self.op.beparams:
1585
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
1586
    if self.op.nicparams:
1587
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
1588

    
1589
    if self.op.candidate_pool_size is not None:
1590
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
1591

    
1592
    self.cfg.Update(self.cluster)
1593

    
1594
    # we want to update nodes after the cluster so that if any errors
1595
    # happen, we have recorded and saved the cluster info
1596
    if self.op.candidate_pool_size is not None:
1597
      _AdjustCandidatePool(self)
1598

    
1599

    
1600
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
1601
  """Distribute additional files which are part of the cluster configuration.
1602

1603
  ConfigWriter takes care of distributing the config and ssconf files, but
1604
  there are more files which should be distributed to all nodes. This function
1605
  makes sure those are copied.
1606

1607
  @param lu: calling logical unit
1608
  @param additional_nodes: list of nodes not in the config to distribute to
1609

1610
  """
1611
  # 1. Gather target nodes
1612
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
1613
  dist_nodes = lu.cfg.GetNodeList()
1614
  if additional_nodes is not None:
1615
    dist_nodes.extend(additional_nodes)
1616
  if myself.name in dist_nodes:
1617
    dist_nodes.remove(myself.name)
1618
  # 2. Gather files to distribute
1619
  dist_files = set([constants.ETC_HOSTS,
1620
                    constants.SSH_KNOWN_HOSTS_FILE,
1621
                    constants.RAPI_CERT_FILE,
1622
                    constants.RAPI_USERS_FILE,
1623
                   ])
1624

    
1625
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
1626
  for hv_name in enabled_hypervisors:
1627
    hv_class = hypervisor.GetHypervisor(hv_name)
1628
    dist_files.update(hv_class.GetAncillaryFiles())
1629

    
1630
  # 3. Perform the files upload
1631
  for fname in dist_files:
1632
    if os.path.exists(fname):
1633
      result = lu.rpc.call_upload_file(dist_nodes, fname)
1634
      for to_node, to_result in result.items():
1635
         msg = to_result.RemoteFailMsg()
1636
         if msg:
1637
           msg = ("Copy of file %s to node %s failed: %s" %
1638
                   (fname, to_node, msg))
1639
           lu.proc.LogWarning(msg)
1640

    
1641

    
1642
class LURedistributeConfig(NoHooksLU):
1643
  """Force the redistribution of cluster configuration.
1644

1645
  This is a very simple LU.
1646

1647
  """
1648
  _OP_REQP = []
1649
  REQ_BGL = False
1650

    
1651
  def ExpandNames(self):
1652
    self.needed_locks = {
1653
      locking.LEVEL_NODE: locking.ALL_SET,
1654
    }
1655
    self.share_locks[locking.LEVEL_NODE] = 1
1656

    
1657
  def CheckPrereq(self):
1658
    """Check prerequisites.
1659

1660
    """
1661

    
1662
  def Exec(self, feedback_fn):
1663
    """Redistribute the configuration.
1664

1665
    """
1666
    self.cfg.Update(self.cfg.GetClusterInfo())
1667
    _RedistributeAncillaryFiles(self)
1668

    
1669

    
1670
def _WaitForSync(lu, instance, oneshot=False, unlock=False):
1671
  """Sleep and poll for an instance's disk to sync.
1672

1673
  """
1674
  if not instance.disks:
1675
    return True
1676

    
1677
  if not oneshot:
1678
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
1679

    
1680
  node = instance.primary_node
1681

    
1682
  for dev in instance.disks:
1683
    lu.cfg.SetDiskID(dev, node)
1684

    
1685
  retries = 0
1686
  while True:
1687
    max_time = 0
1688
    done = True
1689
    cumul_degraded = False
1690
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
1691
    msg = rstats.RemoteFailMsg()
1692
    if msg:
1693
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
1694
      retries += 1
1695
      if retries >= 10:
1696
        raise errors.RemoteError("Can't contact node %s for mirror data,"
1697
                                 " aborting." % node)
1698
      time.sleep(6)
1699
      continue
1700
    rstats = rstats.payload
1701
    retries = 0
1702
    for i, mstat in enumerate(rstats):
1703
      if mstat is None:
1704
        lu.LogWarning("Can't compute data for node %s/%s",
1705
                           node, instance.disks[i].iv_name)
1706
        continue
1707
      # we ignore the ldisk parameter
1708
      perc_done, est_time, is_degraded, _ = mstat
1709
      cumul_degraded = cumul_degraded or (is_degraded and perc_done is None)
1710
      if perc_done is not None:
1711
        done = False
1712
        if est_time is not None:
1713
          rem_time = "%d estimated seconds remaining" % est_time
1714
          max_time = est_time
1715
        else:
1716
          rem_time = "no time estimate"
1717
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
1718
                        (instance.disks[i].iv_name, perc_done, rem_time))
1719
    if done or oneshot:
1720
      break
1721

    
1722
    time.sleep(min(60, max_time))
1723

    
1724
  if done:
1725
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
1726
  return not cumul_degraded
1727

    
1728

    
1729
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
1730
  """Check that mirrors are not degraded.
1731

1732
  The ldisk parameter, if True, will change the test from the
1733
  is_degraded attribute (which represents overall non-ok status for
1734
  the device(s)) to the ldisk (representing the local storage status).
1735

1736
  """
1737
  lu.cfg.SetDiskID(dev, node)
1738
  if ldisk:
1739
    idx = 6
1740
  else:
1741
    idx = 5
1742

    
1743
  result = True
1744
  if on_primary or dev.AssembleOnSecondary():
1745
    rstats = lu.rpc.call_blockdev_find(node, dev)
1746
    msg = rstats.RemoteFailMsg()
1747
    if msg:
1748
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
1749
      result = False
1750
    elif not rstats.payload:
1751
      lu.LogWarning("Can't find disk on node %s", node)
1752
      result = False
1753
    else:
1754
      result = result and (not rstats.payload[idx])
1755
  if dev.children:
1756
    for child in dev.children:
1757
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
1758

    
1759
  return result
1760

    
1761

    
1762
class LUDiagnoseOS(NoHooksLU):
1763
  """Logical unit for OS diagnose/query.
1764

1765
  """
1766
  _OP_REQP = ["output_fields", "names"]
1767
  REQ_BGL = False
1768
  _FIELDS_STATIC = utils.FieldSet()
1769
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status")
1770

    
1771
  def ExpandNames(self):
1772
    if self.op.names:
1773
      raise errors.OpPrereqError("Selective OS query not supported")
1774

    
1775
    _CheckOutputFields(static=self._FIELDS_STATIC,
1776
                       dynamic=self._FIELDS_DYNAMIC,
1777
                       selected=self.op.output_fields)
1778

    
1779
    # Lock all nodes, in shared mode
1780
    # Temporary removal of locks, should be reverted later
1781
    # TODO: reintroduce locks when they are lighter-weight
1782
    self.needed_locks = {}
1783
    #self.share_locks[locking.LEVEL_NODE] = 1
1784
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
1785

    
1786
  def CheckPrereq(self):
1787
    """Check prerequisites.
1788

1789
    """
1790

    
1791
  @staticmethod
1792
  def _DiagnoseByOS(node_list, rlist):
1793
    """Remaps a per-node return list into an a per-os per-node dictionary
1794

1795
    @param node_list: a list with the names of all nodes
1796
    @param rlist: a map with node names as keys and OS objects as values
1797

1798
    @rtype: dict
1799
    @return: a dictionary with osnames as keys and as value another map, with
1800
        nodes as keys and list of OS objects as values, eg::
1801

1802
          {"debian-etch": {"node1": [<object>,...],
1803
                           "node2": [<object>,]}
1804
          }
1805

1806
    """
1807
    all_os = {}
1808
    # we build here the list of nodes that didn't fail the RPC (at RPC
1809
    # level), so that nodes with a non-responding node daemon don't
1810
    # make all OSes invalid
1811
    good_nodes = [node_name for node_name in rlist
1812
                  if not rlist[node_name].failed]
1813
    for node_name, nr in rlist.iteritems():
1814
      if nr.failed or not nr.data:
1815
        continue
1816
      for os_obj in nr.data:
1817
        if os_obj.name not in all_os:
1818
          # build a list of nodes for this os containing empty lists
1819
          # for each node in node_list
1820
          all_os[os_obj.name] = {}
1821
          for nname in good_nodes:
1822
            all_os[os_obj.name][nname] = []
1823
        all_os[os_obj.name][node_name].append(os_obj)
1824
    return all_os
1825

    
1826
  def Exec(self, feedback_fn):
1827
    """Compute the list of OSes.
1828

1829
    """
1830
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
1831
    node_data = self.rpc.call_os_diagnose(valid_nodes)
1832
    if node_data == False:
1833
      raise errors.OpExecError("Can't gather the list of OSes")
1834
    pol = self._DiagnoseByOS(valid_nodes, node_data)
1835
    output = []
1836
    for os_name, os_data in pol.iteritems():
1837
      row = []
1838
      for field in self.op.output_fields:
1839
        if field == "name":
1840
          val = os_name
1841
        elif field == "valid":
1842
          val = utils.all([osl and osl[0] for osl in os_data.values()])
1843
        elif field == "node_status":
1844
          val = {}
1845
          for node_name, nos_list in os_data.iteritems():
1846
            val[node_name] = [(v.status, v.path) for v in nos_list]
1847
        else:
1848
          raise errors.ParameterError(field)
1849
        row.append(val)
1850
      output.append(row)
1851

    
1852
    return output
1853

    
1854

    
1855
class LURemoveNode(LogicalUnit):
1856
  """Logical unit for removing a node.
1857

1858
  """
1859
  HPATH = "node-remove"
1860
  HTYPE = constants.HTYPE_NODE
1861
  _OP_REQP = ["node_name"]
1862

    
1863
  def BuildHooksEnv(self):
1864
    """Build hooks env.
1865

1866
    This doesn't run on the target node in the pre phase as a failed
1867
    node would then be impossible to remove.
1868

1869
    """
1870
    env = {
1871
      "OP_TARGET": self.op.node_name,
1872
      "NODE_NAME": self.op.node_name,
1873
      }
1874
    all_nodes = self.cfg.GetNodeList()
1875
    all_nodes.remove(self.op.node_name)
1876
    return env, all_nodes, all_nodes
1877

    
1878
  def CheckPrereq(self):
1879
    """Check prerequisites.
1880

1881
    This checks:
1882
     - the node exists in the configuration
1883
     - it does not have primary or secondary instances
1884
     - it's not the master
1885

1886
    Any errors are signalled by raising errors.OpPrereqError.
1887

1888
    """
1889
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
1890
    if node is None:
1891
      raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
1892

    
1893
    instance_list = self.cfg.GetInstanceList()
1894

    
1895
    masternode = self.cfg.GetMasterNode()
1896
    if node.name == masternode:
1897
      raise errors.OpPrereqError("Node is the master node,"
1898
                                 " you need to failover first.")
1899

    
1900
    for instance_name in instance_list:
1901
      instance = self.cfg.GetInstanceInfo(instance_name)
1902
      if node.name in instance.all_nodes:
1903
        raise errors.OpPrereqError("Instance %s is still running on the node,"
1904
                                   " please remove first." % instance_name)
1905
    self.op.node_name = node.name
1906
    self.node = node
1907

    
1908
  def Exec(self, feedback_fn):
1909
    """Removes the node from the cluster.
1910

1911
    """
1912
    node = self.node
1913
    logging.info("Stopping the node daemon and removing configs from node %s",
1914
                 node.name)
1915

    
1916
    self.context.RemoveNode(node.name)
1917

    
1918
    self.rpc.call_node_leave_cluster(node.name)
1919

    
1920
    # Promote nodes to master candidate as needed
1921
    _AdjustCandidatePool(self)
1922

    
1923

    
1924
class LUQueryNodes(NoHooksLU):
1925
  """Logical unit for querying nodes.
1926

1927
  """
1928
  _OP_REQP = ["output_fields", "names", "use_locking"]
1929
  REQ_BGL = False
1930
  _FIELDS_DYNAMIC = utils.FieldSet(
1931
    "dtotal", "dfree",
1932
    "mtotal", "mnode", "mfree",
1933
    "bootid",
1934
    "ctotal", "cnodes", "csockets",
1935
    )
1936

    
1937
  _FIELDS_STATIC = utils.FieldSet(
1938
    "name", "pinst_cnt", "sinst_cnt",
1939
    "pinst_list", "sinst_list",
1940
    "pip", "sip", "tags",
1941
    "serial_no",
1942
    "master_candidate",
1943
    "master",
1944
    "offline",
1945
    "drained",
1946
    )
1947

    
1948
  def ExpandNames(self):
1949
    _CheckOutputFields(static=self._FIELDS_STATIC,
1950
                       dynamic=self._FIELDS_DYNAMIC,
1951
                       selected=self.op.output_fields)
1952

    
1953
    self.needed_locks = {}
1954
    self.share_locks[locking.LEVEL_NODE] = 1
1955

    
1956
    if self.op.names:
1957
      self.wanted = _GetWantedNodes(self, self.op.names)
1958
    else:
1959
      self.wanted = locking.ALL_SET
1960

    
1961
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
1962
    self.do_locking = self.do_node_query and self.op.use_locking
1963
    if self.do_locking:
1964
      # if we don't request only static fields, we need to lock the nodes
1965
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
1966

    
1967

    
1968
  def CheckPrereq(self):
1969
    """Check prerequisites.
1970

1971
    """
1972
    # The validation of the node list is done in the _GetWantedNodes,
1973
    # if non empty, and if empty, there's no validation to do
1974
    pass
1975

    
1976
  def Exec(self, feedback_fn):
1977
    """Computes the list of nodes and their attributes.
1978

1979
    """
1980
    all_info = self.cfg.GetAllNodesInfo()
1981
    if self.do_locking:
1982
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
1983
    elif self.wanted != locking.ALL_SET:
1984
      nodenames = self.wanted
1985
      missing = set(nodenames).difference(all_info.keys())
1986
      if missing:
1987
        raise errors.OpExecError(
1988
          "Some nodes were removed before retrieving their data: %s" % missing)
1989
    else:
1990
      nodenames = all_info.keys()
1991

    
1992
    nodenames = utils.NiceSort(nodenames)
1993
    nodelist = [all_info[name] for name in nodenames]
1994

    
1995
    # begin data gathering
1996

    
1997
    if self.do_node_query:
1998
      live_data = {}
1999
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2000
                                          self.cfg.GetHypervisorType())
2001
      for name in nodenames:
2002
        nodeinfo = node_data[name]
2003
        if not nodeinfo.failed and nodeinfo.data:
2004
          nodeinfo = nodeinfo.data
2005
          fn = utils.TryConvert
2006
          live_data[name] = {
2007
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2008
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2009
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2010
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2011
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2012
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2013
            "bootid": nodeinfo.get('bootid', None),
2014
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2015
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2016
            }
2017
        else:
2018
          live_data[name] = {}
2019
    else:
2020
      live_data = dict.fromkeys(nodenames, {})
2021

    
2022
    node_to_primary = dict([(name, set()) for name in nodenames])
2023
    node_to_secondary = dict([(name, set()) for name in nodenames])
2024

    
2025
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2026
                             "sinst_cnt", "sinst_list"))
2027
    if inst_fields & frozenset(self.op.output_fields):
2028
      instancelist = self.cfg.GetInstanceList()
2029

    
2030
      for instance_name in instancelist:
2031
        inst = self.cfg.GetInstanceInfo(instance_name)
2032
        if inst.primary_node in node_to_primary:
2033
          node_to_primary[inst.primary_node].add(inst.name)
2034
        for secnode in inst.secondary_nodes:
2035
          if secnode in node_to_secondary:
2036
            node_to_secondary[secnode].add(inst.name)
2037

    
2038
    master_node = self.cfg.GetMasterNode()
2039

    
2040
    # end data gathering
2041

    
2042
    output = []
2043
    for node in nodelist:
2044
      node_output = []
2045
      for field in self.op.output_fields:
2046
        if field == "name":
2047
          val = node.name
2048
        elif field == "pinst_list":
2049
          val = list(node_to_primary[node.name])
2050
        elif field == "sinst_list":
2051
          val = list(node_to_secondary[node.name])
2052
        elif field == "pinst_cnt":
2053
          val = len(node_to_primary[node.name])
2054
        elif field == "sinst_cnt":
2055
          val = len(node_to_secondary[node.name])
2056
        elif field == "pip":
2057
          val = node.primary_ip
2058
        elif field == "sip":
2059
          val = node.secondary_ip
2060
        elif field == "tags":
2061
          val = list(node.GetTags())
2062
        elif field == "serial_no":
2063
          val = node.serial_no
2064
        elif field == "master_candidate":
2065
          val = node.master_candidate
2066
        elif field == "master":
2067
          val = node.name == master_node
2068
        elif field == "offline":
2069
          val = node.offline
2070
        elif field == "drained":
2071
          val = node.drained
2072
        elif self._FIELDS_DYNAMIC.Matches(field):
2073
          val = live_data[node.name].get(field, None)
2074
        else:
2075
          raise errors.ParameterError(field)
2076
        node_output.append(val)
2077
      output.append(node_output)
2078

    
2079
    return output
2080

    
2081

    
2082
class LUQueryNodeVolumes(NoHooksLU):
2083
  """Logical unit for getting volumes on node(s).
2084

2085
  """
2086
  _OP_REQP = ["nodes", "output_fields"]
2087
  REQ_BGL = False
2088
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2089
  _FIELDS_STATIC = utils.FieldSet("node")
2090

    
2091
  def ExpandNames(self):
2092
    _CheckOutputFields(static=self._FIELDS_STATIC,
2093
                       dynamic=self._FIELDS_DYNAMIC,
2094
                       selected=self.op.output_fields)
2095

    
2096
    self.needed_locks = {}
2097
    self.share_locks[locking.LEVEL_NODE] = 1
2098
    if not self.op.nodes:
2099
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2100
    else:
2101
      self.needed_locks[locking.LEVEL_NODE] = \
2102
        _GetWantedNodes(self, self.op.nodes)
2103

    
2104
  def CheckPrereq(self):
2105
    """Check prerequisites.
2106

2107
    This checks that the fields required are valid output fields.
2108

2109
    """
2110
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2111

    
2112
  def Exec(self, feedback_fn):
2113
    """Computes the list of nodes and their attributes.
2114

2115
    """
2116
    nodenames = self.nodes
2117
    volumes = self.rpc.call_node_volumes(nodenames)
2118

    
2119
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2120
             in self.cfg.GetInstanceList()]
2121

    
2122
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2123

    
2124
    output = []
2125
    for node in nodenames:
2126
      if node not in volumes or volumes[node].failed or not volumes[node].data:
2127
        continue
2128

    
2129
      node_vols = volumes[node].data[:]
2130
      node_vols.sort(key=lambda vol: vol['dev'])
2131

    
2132
      for vol in node_vols:
2133
        node_output = []
2134
        for field in self.op.output_fields:
2135
          if field == "node":
2136
            val = node
2137
          elif field == "phys":
2138
            val = vol['dev']
2139
          elif field == "vg":
2140
            val = vol['vg']
2141
          elif field == "name":
2142
            val = vol['name']
2143
          elif field == "size":
2144
            val = int(float(vol['size']))
2145
          elif field == "instance":
2146
            for inst in ilist:
2147
              if node not in lv_by_node[inst]:
2148
                continue
2149
              if vol['name'] in lv_by_node[inst][node]:
2150
                val = inst.name
2151
                break
2152
            else:
2153
              val = '-'
2154
          else:
2155
            raise errors.ParameterError(field)
2156
          node_output.append(str(val))
2157

    
2158
        output.append(node_output)
2159

    
2160
    return output
2161

    
2162

    
2163
class LUAddNode(LogicalUnit):
2164
  """Logical unit for adding node to the cluster.
2165

2166
  """
2167
  HPATH = "node-add"
2168
  HTYPE = constants.HTYPE_NODE
2169
  _OP_REQP = ["node_name"]
2170

    
2171
  def BuildHooksEnv(self):
2172
    """Build hooks env.
2173

2174
    This will run on all nodes before, and on all nodes + the new node after.
2175

2176
    """
2177
    env = {
2178
      "OP_TARGET": self.op.node_name,
2179
      "NODE_NAME": self.op.node_name,
2180
      "NODE_PIP": self.op.primary_ip,
2181
      "NODE_SIP": self.op.secondary_ip,
2182
      }
2183
    nodes_0 = self.cfg.GetNodeList()
2184
    nodes_1 = nodes_0 + [self.op.node_name, ]
2185
    return env, nodes_0, nodes_1
2186

    
2187
  def CheckPrereq(self):
2188
    """Check prerequisites.
2189

2190
    This checks:
2191
     - the new node is not already in the config
2192
     - it is resolvable
2193
     - its parameters (single/dual homed) matches the cluster
2194

2195
    Any errors are signalled by raising errors.OpPrereqError.
2196

2197
    """
2198
    node_name = self.op.node_name
2199
    cfg = self.cfg
2200

    
2201
    dns_data = utils.HostInfo(node_name)
2202

    
2203
    node = dns_data.name
2204
    primary_ip = self.op.primary_ip = dns_data.ip
2205
    secondary_ip = getattr(self.op, "secondary_ip", None)
2206
    if secondary_ip is None:
2207
      secondary_ip = primary_ip
2208
    if not utils.IsValidIP(secondary_ip):
2209
      raise errors.OpPrereqError("Invalid secondary IP given")
2210
    self.op.secondary_ip = secondary_ip
2211

    
2212
    node_list = cfg.GetNodeList()
2213
    if not self.op.readd and node in node_list:
2214
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2215
                                 node)
2216
    elif self.op.readd and node not in node_list:
2217
      raise errors.OpPrereqError("Node %s is not in the configuration" % node)
2218

    
2219
    for existing_node_name in node_list:
2220
      existing_node = cfg.GetNodeInfo(existing_node_name)
2221

    
2222
      if self.op.readd and node == existing_node_name:
2223
        if (existing_node.primary_ip != primary_ip or
2224
            existing_node.secondary_ip != secondary_ip):
2225
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2226
                                     " address configuration as before")
2227
        continue
2228

    
2229
      if (existing_node.primary_ip == primary_ip or
2230
          existing_node.secondary_ip == primary_ip or
2231
          existing_node.primary_ip == secondary_ip or
2232
          existing_node.secondary_ip == secondary_ip):
2233
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2234
                                   " existing node %s" % existing_node.name)
2235

    
2236
    # check that the type of the node (single versus dual homed) is the
2237
    # same as for the master
2238
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2239
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2240
    newbie_singlehomed = secondary_ip == primary_ip
2241
    if master_singlehomed != newbie_singlehomed:
2242
      if master_singlehomed:
2243
        raise errors.OpPrereqError("The master has no private ip but the"
2244
                                   " new node has one")
2245
      else:
2246
        raise errors.OpPrereqError("The master has a private ip but the"
2247
                                   " new node doesn't have one")
2248

    
2249
    # checks reachablity
2250
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2251
      raise errors.OpPrereqError("Node not reachable by ping")
2252

    
2253
    if not newbie_singlehomed:
2254
      # check reachability from my secondary ip to newbie's secondary ip
2255
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2256
                           source=myself.secondary_ip):
2257
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2258
                                   " based ping to noded port")
2259

    
2260
    cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2261
    mc_now, _ = self.cfg.GetMasterCandidateStats()
2262
    master_candidate = mc_now < cp_size
2263

    
2264
    self.new_node = objects.Node(name=node,
2265
                                 primary_ip=primary_ip,
2266
                                 secondary_ip=secondary_ip,
2267
                                 master_candidate=master_candidate,
2268
                                 offline=False, drained=False)
2269

    
2270
  def Exec(self, feedback_fn):
2271
    """Adds the new node to the cluster.
2272

2273
    """
2274
    new_node = self.new_node
2275
    node = new_node.name
2276

    
2277
    # check connectivity
2278
    result = self.rpc.call_version([node])[node]
2279
    result.Raise()
2280
    if result.data:
2281
      if constants.PROTOCOL_VERSION == result.data:
2282
        logging.info("Communication to node %s fine, sw version %s match",
2283
                     node, result.data)
2284
      else:
2285
        raise errors.OpExecError("Version mismatch master version %s,"
2286
                                 " node version %s" %
2287
                                 (constants.PROTOCOL_VERSION, result.data))
2288
    else:
2289
      raise errors.OpExecError("Cannot get version from the new node")
2290

    
2291
    # setup ssh on node
2292
    logging.info("Copy ssh key to node %s", node)
2293
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
2294
    keyarray = []
2295
    keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
2296
                constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
2297
                priv_key, pub_key]
2298

    
2299
    for i in keyfiles:
2300
      f = open(i, 'r')
2301
      try:
2302
        keyarray.append(f.read())
2303
      finally:
2304
        f.close()
2305

    
2306
    result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
2307
                                    keyarray[2],
2308
                                    keyarray[3], keyarray[4], keyarray[5])
2309

    
2310
    msg = result.RemoteFailMsg()
2311
    if msg:
2312
      raise errors.OpExecError("Cannot transfer ssh keys to the"
2313
                               " new node: %s" % msg)
2314

    
2315
    # Add node to our /etc/hosts, and add key to known_hosts
2316
    if self.cfg.GetClusterInfo().modify_etc_hosts:
2317
      utils.AddHostToEtcHosts(new_node.name)
2318

    
2319
    if new_node.secondary_ip != new_node.primary_ip:
2320
      result = self.rpc.call_node_has_ip_address(new_node.name,
2321
                                                 new_node.secondary_ip)
2322
      if result.failed or not result.data:
2323
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
2324
                                 " you gave (%s). Please fix and re-run this"
2325
                                 " command." % new_node.secondary_ip)
2326

    
2327
    node_verify_list = [self.cfg.GetMasterNode()]
2328
    node_verify_param = {
2329
      'nodelist': [node],
2330
      # TODO: do a node-net-test as well?
2331
    }
2332

    
2333
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2334
                                       self.cfg.GetClusterName())
2335
    for verifier in node_verify_list:
2336
      if result[verifier].failed or not result[verifier].data:
2337
        raise errors.OpExecError("Cannot communicate with %s's node daemon"
2338
                                 " for remote verification" % verifier)
2339
      if result[verifier].data['nodelist']:
2340
        for failed in result[verifier].data['nodelist']:
2341
          feedback_fn("ssh/hostname verification failed %s -> %s" %
2342
                      (verifier, result[verifier].data['nodelist'][failed]))
2343
        raise errors.OpExecError("ssh/hostname verification failed.")
2344

    
2345
    if self.op.readd:
2346
      _RedistributeAncillaryFiles(self)
2347
      self.context.ReaddNode(new_node)
2348
    else:
2349
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
2350
      self.context.AddNode(new_node)
2351

    
2352

    
2353
class LUSetNodeParams(LogicalUnit):
2354
  """Modifies the parameters of a node.
2355

2356
  """
2357
  HPATH = "node-modify"
2358
  HTYPE = constants.HTYPE_NODE
2359
  _OP_REQP = ["node_name"]
2360
  REQ_BGL = False
2361

    
2362
  def CheckArguments(self):
2363
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2364
    if node_name is None:
2365
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2366
    self.op.node_name = node_name
2367
    _CheckBooleanOpField(self.op, 'master_candidate')
2368
    _CheckBooleanOpField(self.op, 'offline')
2369
    _CheckBooleanOpField(self.op, 'drained')
2370
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
2371
    if all_mods.count(None) == 3:
2372
      raise errors.OpPrereqError("Please pass at least one modification")
2373
    if all_mods.count(True) > 1:
2374
      raise errors.OpPrereqError("Can't set the node into more than one"
2375
                                 " state at the same time")
2376

    
2377
  def ExpandNames(self):
2378
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
2379

    
2380
  def BuildHooksEnv(self):
2381
    """Build hooks env.
2382

2383
    This runs on the master node.
2384

2385
    """
2386
    env = {
2387
      "OP_TARGET": self.op.node_name,
2388
      "MASTER_CANDIDATE": str(self.op.master_candidate),
2389
      "OFFLINE": str(self.op.offline),
2390
      "DRAINED": str(self.op.drained),
2391
      }
2392
    nl = [self.cfg.GetMasterNode(),
2393
          self.op.node_name]
2394
    return env, nl, nl
2395

    
2396
  def CheckPrereq(self):
2397
    """Check prerequisites.
2398

2399
    This only checks the instance list against the existing names.
2400

2401
    """
2402
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
2403

    
2404
    if ((self.op.master_candidate == False or self.op.offline == True or
2405
         self.op.drained == True) and node.master_candidate):
2406
      # we will demote the node from master_candidate
2407
      if self.op.node_name == self.cfg.GetMasterNode():
2408
        raise errors.OpPrereqError("The master node has to be a"
2409
                                   " master candidate, online and not drained")
2410
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2411
      num_candidates, _ = self.cfg.GetMasterCandidateStats()
2412
      if num_candidates <= cp_size:
2413
        msg = ("Not enough master candidates (desired"
2414
               " %d, new value will be %d)" % (cp_size, num_candidates-1))
2415
        if self.op.force:
2416
          self.LogWarning(msg)
2417
        else:
2418
          raise errors.OpPrereqError(msg)
2419

    
2420
    if (self.op.master_candidate == True and
2421
        ((node.offline and not self.op.offline == False) or
2422
         (node.drained and not self.op.drained == False))):
2423
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
2424
                                 " to master_candidate" % node.name)
2425

    
2426
    return
2427

    
2428
  def Exec(self, feedback_fn):
2429
    """Modifies a node.
2430

2431
    """
2432
    node = self.node
2433

    
2434
    result = []
2435
    changed_mc = False
2436

    
2437
    if self.op.offline is not None:
2438
      node.offline = self.op.offline
2439
      result.append(("offline", str(self.op.offline)))
2440
      if self.op.offline == True:
2441
        if node.master_candidate:
2442
          node.master_candidate = False
2443
          changed_mc = True
2444
          result.append(("master_candidate", "auto-demotion due to offline"))
2445
        if node.drained:
2446
          node.drained = False
2447
          result.append(("drained", "clear drained status due to offline"))
2448

    
2449
    if self.op.master_candidate is not None:
2450
      node.master_candidate = self.op.master_candidate
2451
      changed_mc = True
2452
      result.append(("master_candidate", str(self.op.master_candidate)))
2453
      if self.op.master_candidate == False:
2454
        rrc = self.rpc.call_node_demote_from_mc(node.name)
2455
        msg = rrc.RemoteFailMsg()
2456
        if msg:
2457
          self.LogWarning("Node failed to demote itself: %s" % msg)
2458

    
2459
    if self.op.drained is not None:
2460
      node.drained = self.op.drained
2461
      result.append(("drained", str(self.op.drained)))
2462
      if self.op.drained == True:
2463
        if node.master_candidate:
2464
          node.master_candidate = False
2465
          changed_mc = True
2466
          result.append(("master_candidate", "auto-demotion due to drain"))
2467
        if node.offline:
2468
          node.offline = False
2469
          result.append(("offline", "clear offline status due to drain"))
2470

    
2471
    # this will trigger configuration file update, if needed
2472
    self.cfg.Update(node)
2473
    # this will trigger job queue propagation or cleanup
2474
    if changed_mc:
2475
      self.context.ReaddNode(node)
2476

    
2477
    return result
2478

    
2479

    
2480
class LUPowercycleNode(NoHooksLU):
2481
  """Powercycles a node.
2482

2483
  """
2484
  _OP_REQP = ["node_name", "force"]
2485
  REQ_BGL = False
2486

    
2487
  def CheckArguments(self):
2488
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2489
    if node_name is None:
2490
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2491
    self.op.node_name = node_name
2492
    if node_name == self.cfg.GetMasterNode() and not self.op.force:
2493
      raise errors.OpPrereqError("The node is the master and the force"
2494
                                 " parameter was not set")
2495

    
2496
  def ExpandNames(self):
2497
    """Locking for PowercycleNode.
2498

2499
    This is a last-resource option and shouldn't block on other
2500
    jobs. Therefore, we grab no locks.
2501

2502
    """
2503
    self.needed_locks = {}
2504

    
2505
  def CheckPrereq(self):
2506
    """Check prerequisites.
2507

2508
    This LU has no prereqs.
2509

2510
    """
2511
    pass
2512

    
2513
  def Exec(self, feedback_fn):
2514
    """Reboots a node.
2515

2516
    """
2517
    result = self.rpc.call_node_powercycle(self.op.node_name,
2518
                                           self.cfg.GetHypervisorType())
2519
    msg = result.RemoteFailMsg()
2520
    if msg:
2521
      raise errors.OpExecError("Failed to schedule the reboot: %s" % msg)
2522
    return result.payload
2523

    
2524

    
2525
class LUQueryClusterInfo(NoHooksLU):
2526
  """Query cluster configuration.
2527

2528
  """
2529
  _OP_REQP = []
2530
  REQ_BGL = False
2531

    
2532
  def ExpandNames(self):
2533
    self.needed_locks = {}
2534

    
2535
  def CheckPrereq(self):
2536
    """No prerequsites needed for this LU.
2537

2538
    """
2539
    pass
2540

    
2541
  def Exec(self, feedback_fn):
2542
    """Return cluster config.
2543

2544
    """
2545
    cluster = self.cfg.GetClusterInfo()
2546
    result = {
2547
      "software_version": constants.RELEASE_VERSION,
2548
      "protocol_version": constants.PROTOCOL_VERSION,
2549
      "config_version": constants.CONFIG_VERSION,
2550
      "os_api_version": constants.OS_API_VERSION,
2551
      "export_version": constants.EXPORT_VERSION,
2552
      "architecture": (platform.architecture()[0], platform.machine()),
2553
      "name": cluster.cluster_name,
2554
      "master": cluster.master_node,
2555
      "default_hypervisor": cluster.default_hypervisor,
2556
      "enabled_hypervisors": cluster.enabled_hypervisors,
2557
      "hvparams": dict([(hypervisor, cluster.hvparams[hypervisor])
2558
                        for hypervisor in cluster.enabled_hypervisors]),
2559
      "beparams": cluster.beparams,
2560
      "nicparams": cluster.nicparams,
2561
      "candidate_pool_size": cluster.candidate_pool_size,
2562
      "master_netdev": cluster.master_netdev,
2563
      "volume_group_name": cluster.volume_group_name,
2564
      "file_storage_dir": cluster.file_storage_dir,
2565
      }
2566

    
2567
    return result
2568

    
2569

    
2570
class LUQueryConfigValues(NoHooksLU):
2571
  """Return configuration values.
2572

2573
  """
2574
  _OP_REQP = []
2575
  REQ_BGL = False
2576
  _FIELDS_DYNAMIC = utils.FieldSet()
2577
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag")
2578

    
2579
  def ExpandNames(self):
2580
    self.needed_locks = {}
2581

    
2582
    _CheckOutputFields(static=self._FIELDS_STATIC,
2583
                       dynamic=self._FIELDS_DYNAMIC,
2584
                       selected=self.op.output_fields)
2585

    
2586
  def CheckPrereq(self):
2587
    """No prerequisites.
2588

2589
    """
2590
    pass
2591

    
2592
  def Exec(self, feedback_fn):
2593
    """Dump a representation of the cluster config to the standard output.
2594

2595
    """
2596
    values = []
2597
    for field in self.op.output_fields:
2598
      if field == "cluster_name":
2599
        entry = self.cfg.GetClusterName()
2600
      elif field == "master_node":
2601
        entry = self.cfg.GetMasterNode()
2602
      elif field == "drain_flag":
2603
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
2604
      else:
2605
        raise errors.ParameterError(field)
2606
      values.append(entry)
2607
    return values
2608

    
2609

    
2610
class LUActivateInstanceDisks(NoHooksLU):
2611
  """Bring up an instance's disks.
2612

2613
  """
2614
  _OP_REQP = ["instance_name"]
2615
  REQ_BGL = False
2616

    
2617
  def ExpandNames(self):
2618
    self._ExpandAndLockInstance()
2619
    self.needed_locks[locking.LEVEL_NODE] = []
2620
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2621

    
2622
  def DeclareLocks(self, level):
2623
    if level == locking.LEVEL_NODE:
2624
      self._LockInstancesNodes()
2625

    
2626
  def CheckPrereq(self):
2627
    """Check prerequisites.
2628

2629
    This checks that the instance is in the cluster.
2630

2631
    """
2632
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2633
    assert self.instance is not None, \
2634
      "Cannot retrieve locked instance %s" % self.op.instance_name
2635
    _CheckNodeOnline(self, self.instance.primary_node)
2636

    
2637
  def Exec(self, feedback_fn):
2638
    """Activate the disks.
2639

2640
    """
2641
    disks_ok, disks_info = _AssembleInstanceDisks(self, self.instance)
2642
    if not disks_ok:
2643
      raise errors.OpExecError("Cannot activate block devices")
2644

    
2645
    return disks_info
2646

    
2647

    
2648
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
2649
  """Prepare the block devices for an instance.
2650

2651
  This sets up the block devices on all nodes.
2652

2653
  @type lu: L{LogicalUnit}
2654
  @param lu: the logical unit on whose behalf we execute
2655
  @type instance: L{objects.Instance}
2656
  @param instance: the instance for whose disks we assemble
2657
  @type ignore_secondaries: boolean
2658
  @param ignore_secondaries: if true, errors on secondary nodes
2659
      won't result in an error return from the function
2660
  @return: False if the operation failed, otherwise a list of
2661
      (host, instance_visible_name, node_visible_name)
2662
      with the mapping from node devices to instance devices
2663

2664
  """
2665
  device_info = []
2666
  disks_ok = True
2667
  iname = instance.name
2668
  # With the two passes mechanism we try to reduce the window of
2669
  # opportunity for the race condition of switching DRBD to primary
2670
  # before handshaking occured, but we do not eliminate it
2671

    
2672
  # The proper fix would be to wait (with some limits) until the
2673
  # connection has been made and drbd transitions from WFConnection
2674
  # into any other network-connected state (Connected, SyncTarget,
2675
  # SyncSource, etc.)
2676

    
2677
  # 1st pass, assemble on all nodes in secondary mode
2678
  for inst_disk in instance.disks:
2679
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2680
      lu.cfg.SetDiskID(node_disk, node)
2681
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
2682
      msg = result.RemoteFailMsg()
2683
      if msg:
2684
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
2685
                           " (is_primary=False, pass=1): %s",
2686
                           inst_disk.iv_name, node, msg)
2687
        if not ignore_secondaries:
2688
          disks_ok = False
2689

    
2690
  # FIXME: race condition on drbd migration to primary
2691

    
2692
  # 2nd pass, do only the primary node
2693
  for inst_disk in instance.disks:
2694
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2695
      if node != instance.primary_node:
2696
        continue
2697
      lu.cfg.SetDiskID(node_disk, node)
2698
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
2699
      msg = result.RemoteFailMsg()
2700
      if msg:
2701
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
2702
                           " (is_primary=True, pass=2): %s",
2703
                           inst_disk.iv_name, node, msg)
2704
        disks_ok = False
2705
    device_info.append((instance.primary_node, inst_disk.iv_name,
2706
                        result.payload))
2707

    
2708
  # leave the disks configured for the primary node
2709
  # this is a workaround that would be fixed better by
2710
  # improving the logical/physical id handling
2711
  for disk in instance.disks:
2712
    lu.cfg.SetDiskID(disk, instance.primary_node)
2713

    
2714
  return disks_ok, device_info
2715

    
2716

    
2717
def _StartInstanceDisks(lu, instance, force):
2718
  """Start the disks of an instance.
2719

2720
  """
2721
  disks_ok, dummy = _AssembleInstanceDisks(lu, instance,
2722
                                           ignore_secondaries=force)
2723
  if not disks_ok:
2724
    _ShutdownInstanceDisks(lu, instance)
2725
    if force is not None and not force:
2726
      lu.proc.LogWarning("", hint="If the message above refers to a"
2727
                         " secondary node,"
2728
                         " you can retry the operation using '--force'.")
2729
    raise errors.OpExecError("Disk consistency error")
2730

    
2731

    
2732
class LUDeactivateInstanceDisks(NoHooksLU):
2733
  """Shutdown an instance's disks.
2734

2735
  """
2736
  _OP_REQP = ["instance_name"]
2737
  REQ_BGL = False
2738

    
2739
  def ExpandNames(self):
2740
    self._ExpandAndLockInstance()
2741
    self.needed_locks[locking.LEVEL_NODE] = []
2742
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2743

    
2744
  def DeclareLocks(self, level):
2745
    if level == locking.LEVEL_NODE:
2746
      self._LockInstancesNodes()
2747

    
2748
  def CheckPrereq(self):
2749
    """Check prerequisites.
2750

2751
    This checks that the instance is in the cluster.
2752

2753
    """
2754
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2755
    assert self.instance is not None, \
2756
      "Cannot retrieve locked instance %s" % self.op.instance_name
2757

    
2758
  def Exec(self, feedback_fn):
2759
    """Deactivate the disks
2760

2761
    """
2762
    instance = self.instance
2763
    _SafeShutdownInstanceDisks(self, instance)
2764

    
2765

    
2766
def _SafeShutdownInstanceDisks(lu, instance):
2767
  """Shutdown block devices of an instance.
2768

2769
  This function checks if an instance is running, before calling
2770
  _ShutdownInstanceDisks.
2771

2772
  """
2773
  ins_l = lu.rpc.call_instance_list([instance.primary_node],
2774
                                      [instance.hypervisor])
2775
  ins_l = ins_l[instance.primary_node]
2776
  if ins_l.failed or not isinstance(ins_l.data, list):
2777
    raise errors.OpExecError("Can't contact node '%s'" %
2778
                             instance.primary_node)
2779

    
2780
  if instance.name in ins_l.data:
2781
    raise errors.OpExecError("Instance is running, can't shutdown"
2782
                             " block devices.")
2783

    
2784
  _ShutdownInstanceDisks(lu, instance)
2785

    
2786

    
2787
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
2788
  """Shutdown block devices of an instance.
2789

2790
  This does the shutdown on all nodes of the instance.
2791

2792
  If the ignore_primary is false, errors on the primary node are
2793
  ignored.
2794

2795
  """
2796
  all_result = True
2797
  for disk in instance.disks:
2798
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
2799
      lu.cfg.SetDiskID(top_disk, node)
2800
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
2801
      msg = result.RemoteFailMsg()
2802
      if msg:
2803
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
2804
                      disk.iv_name, node, msg)
2805
        if not ignore_primary or node != instance.primary_node:
2806
          all_result = False
2807
  return all_result
2808

    
2809

    
2810
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
2811
  """Checks if a node has enough free memory.
2812

2813
  This function check if a given node has the needed amount of free
2814
  memory. In case the node has less memory or we cannot get the
2815
  information from the node, this function raise an OpPrereqError
2816
  exception.
2817

2818
  @type lu: C{LogicalUnit}
2819
  @param lu: a logical unit from which we get configuration data
2820
  @type node: C{str}
2821
  @param node: the node to check
2822
  @type reason: C{str}
2823
  @param reason: string to use in the error message
2824
  @type requested: C{int}
2825
  @param requested: the amount of memory in MiB to check for
2826
  @type hypervisor_name: C{str}
2827
  @param hypervisor_name: the hypervisor to ask for memory stats
2828
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
2829
      we cannot check the node
2830

2831
  """
2832
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
2833
  nodeinfo[node].Raise()
2834
  free_mem = nodeinfo[node].data.get('memory_free')
2835
  if not isinstance(free_mem, int):
2836
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
2837
                             " was '%s'" % (node, free_mem))
2838
  if requested > free_mem:
2839
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
2840
                             " needed %s MiB, available %s MiB" %
2841
                             (node, reason, requested, free_mem))
2842

    
2843

    
2844
class LUStartupInstance(LogicalUnit):
2845
  """Starts an instance.
2846

2847
  """
2848
  HPATH = "instance-start"
2849
  HTYPE = constants.HTYPE_INSTANCE
2850
  _OP_REQP = ["instance_name", "force"]
2851
  REQ_BGL = False
2852

    
2853
  def ExpandNames(self):
2854
    self._ExpandAndLockInstance()
2855

    
2856
  def BuildHooksEnv(self):
2857
    """Build hooks env.
2858

2859
    This runs on master, primary and secondary nodes of the instance.
2860

2861
    """
2862
    env = {
2863
      "FORCE": self.op.force,
2864
      }
2865
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
2866
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
2867
    return env, nl, nl
2868

    
2869
  def CheckPrereq(self):
2870
    """Check prerequisites.
2871

2872
    This checks that the instance is in the cluster.
2873

2874
    """
2875
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2876
    assert self.instance is not None, \
2877
      "Cannot retrieve locked instance %s" % self.op.instance_name
2878

    
2879
    # extra beparams
2880
    self.beparams = getattr(self.op, "beparams", {})
2881
    if self.beparams:
2882
      if not isinstance(self.beparams, dict):
2883
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
2884
                                   " dict" % (type(self.beparams), ))
2885
      # fill the beparams dict
2886
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
2887
      self.op.beparams = self.beparams
2888

    
2889
    # extra hvparams
2890
    self.hvparams = getattr(self.op, "hvparams", {})
2891
    if self.hvparams:
2892
      if not isinstance(self.hvparams, dict):
2893
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
2894
                                   " dict" % (type(self.hvparams), ))
2895

    
2896
      # check hypervisor parameter syntax (locally)
2897
      cluster = self.cfg.GetClusterInfo()
2898
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
2899
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
2900
                                    instance.hvparams)
2901
      filled_hvp.update(self.hvparams)
2902
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
2903
      hv_type.CheckParameterSyntax(filled_hvp)
2904
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
2905
      self.op.hvparams = self.hvparams
2906

    
2907
    _CheckNodeOnline(self, instance.primary_node)
2908

    
2909
    bep = self.cfg.GetClusterInfo().FillBE(instance)
2910
    # check bridges existance
2911
    _CheckInstanceBridgesExist(self, instance)
2912

    
2913
    remote_info = self.rpc.call_instance_info(instance.primary_node,
2914
                                              instance.name,
2915
                                              instance.hypervisor)
2916
    remote_info.Raise()
2917
    if not remote_info.data:
2918
      _CheckNodeFreeMemory(self, instance.primary_node,
2919
                           "starting instance %s" % instance.name,
2920
                           bep[constants.BE_MEMORY], instance.hypervisor)
2921

    
2922
  def Exec(self, feedback_fn):
2923
    """Start the instance.
2924

2925
    """
2926
    instance = self.instance
2927
    force = self.op.force
2928

    
2929
    self.cfg.MarkInstanceUp(instance.name)
2930

    
2931
    node_current = instance.primary_node
2932

    
2933
    _StartInstanceDisks(self, instance, force)
2934

    
2935
    result = self.rpc.call_instance_start(node_current, instance,
2936
                                          self.hvparams, self.beparams)
2937
    msg = result.RemoteFailMsg()
2938
    if msg:
2939
      _ShutdownInstanceDisks(self, instance)
2940
      raise errors.OpExecError("Could not start instance: %s" % msg)
2941

    
2942

    
2943
class LURebootInstance(LogicalUnit):
2944
  """Reboot an instance.
2945

2946
  """
2947
  HPATH = "instance-reboot"
2948
  HTYPE = constants.HTYPE_INSTANCE
2949
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
2950
  REQ_BGL = False
2951

    
2952
  def ExpandNames(self):
2953
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
2954
                                   constants.INSTANCE_REBOOT_HARD,
2955
                                   constants.INSTANCE_REBOOT_FULL]:
2956
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
2957
                                  (constants.INSTANCE_REBOOT_SOFT,
2958
                                   constants.INSTANCE_REBOOT_HARD,
2959
                                   constants.INSTANCE_REBOOT_FULL))
2960
    self._ExpandAndLockInstance()
2961

    
2962
  def BuildHooksEnv(self):
2963
    """Build hooks env.
2964

2965
    This runs on master, primary and secondary nodes of the instance.
2966

2967
    """
2968
    env = {
2969
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
2970
      "REBOOT_TYPE": self.op.reboot_type,
2971
      }
2972
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
2973
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
2974
    return env, nl, nl
2975

    
2976
  def CheckPrereq(self):
2977
    """Check prerequisites.
2978

2979
    This checks that the instance is in the cluster.
2980

2981
    """
2982
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2983
    assert self.instance is not None, \
2984
      "Cannot retrieve locked instance %s" % self.op.instance_name
2985

    
2986
    _CheckNodeOnline(self, instance.primary_node)
2987

    
2988
    # check bridges existance
2989
    _CheckInstanceBridgesExist(self, instance)
2990

    
2991
  def Exec(self, feedback_fn):
2992
    """Reboot the instance.
2993

2994
    """
2995
    instance = self.instance
2996
    ignore_secondaries = self.op.ignore_secondaries
2997
    reboot_type = self.op.reboot_type
2998

    
2999
    node_current = instance.primary_node
3000

    
3001
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3002
                       constants.INSTANCE_REBOOT_HARD]:
3003
      for disk in instance.disks:
3004
        self.cfg.SetDiskID(disk, node_current)
3005
      result = self.rpc.call_instance_reboot(node_current, instance,
3006
                                             reboot_type)
3007
      msg = result.RemoteFailMsg()
3008
      if msg:
3009
        raise errors.OpExecError("Could not reboot instance: %s" % msg)
3010
    else:
3011
      result = self.rpc.call_instance_shutdown(node_current, instance)
3012
      msg = result.RemoteFailMsg()
3013
      if msg:
3014
        raise errors.OpExecError("Could not shutdown instance for"
3015
                                 " full reboot: %s" % msg)
3016
      _ShutdownInstanceDisks(self, instance)
3017
      _StartInstanceDisks(self, instance, ignore_secondaries)
3018
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3019
      msg = result.RemoteFailMsg()
3020
      if msg:
3021
        _ShutdownInstanceDisks(self, instance)
3022
        raise errors.OpExecError("Could not start instance for"
3023
                                 " full reboot: %s" % msg)
3024

    
3025
    self.cfg.MarkInstanceUp(instance.name)
3026

    
3027

    
3028
class LUShutdownInstance(LogicalUnit):
3029
  """Shutdown an instance.
3030

3031
  """
3032
  HPATH = "instance-stop"
3033
  HTYPE = constants.HTYPE_INSTANCE
3034
  _OP_REQP = ["instance_name"]
3035
  REQ_BGL = False
3036

    
3037
  def ExpandNames(self):
3038
    self._ExpandAndLockInstance()
3039

    
3040
  def BuildHooksEnv(self):
3041
    """Build hooks env.
3042

3043
    This runs on master, primary and secondary nodes of the instance.
3044

3045
    """
3046
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3047
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3048
    return env, nl, nl
3049

    
3050
  def CheckPrereq(self):
3051
    """Check prerequisites.
3052

3053
    This checks that the instance is in the cluster.
3054

3055
    """
3056
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3057
    assert self.instance is not None, \
3058
      "Cannot retrieve locked instance %s" % self.op.instance_name
3059
    _CheckNodeOnline(self, self.instance.primary_node)
3060

    
3061
  def Exec(self, feedback_fn):
3062
    """Shutdown the instance.
3063

3064
    """
3065
    instance = self.instance
3066
    node_current = instance.primary_node
3067
    self.cfg.MarkInstanceDown(instance.name)
3068
    result = self.rpc.call_instance_shutdown(node_current, instance)
3069
    msg = result.RemoteFailMsg()
3070
    if msg:
3071
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3072

    
3073
    _ShutdownInstanceDisks(self, instance)
3074

    
3075

    
3076
class LUReinstallInstance(LogicalUnit):
3077
  """Reinstall an instance.
3078

3079
  """
3080
  HPATH = "instance-reinstall"
3081
  HTYPE = constants.HTYPE_INSTANCE
3082
  _OP_REQP = ["instance_name"]
3083
  REQ_BGL = False
3084

    
3085
  def ExpandNames(self):
3086
    self._ExpandAndLockInstance()
3087

    
3088
  def BuildHooksEnv(self):
3089
    """Build hooks env.
3090

3091
    This runs on master, primary and secondary nodes of the instance.
3092

3093
    """
3094
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3095
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3096
    return env, nl, nl
3097

    
3098
  def CheckPrereq(self):
3099
    """Check prerequisites.
3100

3101
    This checks that the instance is in the cluster and is not running.
3102

3103
    """
3104
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3105
    assert instance is not None, \
3106
      "Cannot retrieve locked instance %s" % self.op.instance_name
3107
    _CheckNodeOnline(self, instance.primary_node)
3108

    
3109
    if instance.disk_template == constants.DT_DISKLESS:
3110
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3111
                                 self.op.instance_name)
3112
    if instance.admin_up:
3113
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3114
                                 self.op.instance_name)
3115
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3116
                                              instance.name,
3117
                                              instance.hypervisor)
3118
    remote_info.Raise()
3119
    if remote_info.data:
3120
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3121
                                 (self.op.instance_name,
3122
                                  instance.primary_node))
3123

    
3124
    self.op.os_type = getattr(self.op, "os_type", None)
3125
    if self.op.os_type is not None:
3126
      # OS verification
3127
      pnode = self.cfg.GetNodeInfo(
3128
        self.cfg.ExpandNodeName(instance.primary_node))
3129
      if pnode is None:
3130
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
3131
                                   self.op.pnode)
3132
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3133
      result.Raise()
3134
      if not isinstance(result.data, objects.OS):
3135
        raise errors.OpPrereqError("OS '%s' not in supported OS list for"
3136
                                   " primary node"  % self.op.os_type)
3137

    
3138
    self.instance = instance
3139

    
3140
  def Exec(self, feedback_fn):
3141
    """Reinstall the instance.
3142

3143
    """
3144
    inst = self.instance
3145

    
3146
    if self.op.os_type is not None:
3147
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3148
      inst.os = self.op.os_type
3149
      self.cfg.Update(inst)
3150

    
3151
    _StartInstanceDisks(self, inst, None)
3152
    try:
3153
      feedback_fn("Running the instance OS create scripts...")
3154
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
3155
      msg = result.RemoteFailMsg()
3156
      if msg:
3157
        raise errors.OpExecError("Could not install OS for instance %s"
3158
                                 " on node %s: %s" %
3159
                                 (inst.name, inst.primary_node, msg))
3160
    finally:
3161
      _ShutdownInstanceDisks(self, inst)
3162

    
3163

    
3164
class LURenameInstance(LogicalUnit):
3165
  """Rename an instance.
3166

3167
  """
3168
  HPATH = "instance-rename"
3169
  HTYPE = constants.HTYPE_INSTANCE
3170
  _OP_REQP = ["instance_name", "new_name"]
3171

    
3172
  def BuildHooksEnv(self):
3173
    """Build hooks env.
3174

3175
    This runs on master, primary and secondary nodes of the instance.
3176

3177
    """
3178
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3179
    env["INSTANCE_NEW_NAME"] = self.op.new_name
3180
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3181
    return env, nl, nl
3182

    
3183
  def CheckPrereq(self):
3184
    """Check prerequisites.
3185

3186
    This checks that the instance is in the cluster and is not running.
3187

3188
    """
3189
    instance = self.cfg.GetInstanceInfo(
3190
      self.cfg.ExpandInstanceName(self.op.instance_name))
3191
    if instance is None:
3192
      raise errors.OpPrereqError("Instance '%s' not known" %
3193
                                 self.op.instance_name)
3194
    _CheckNodeOnline(self, instance.primary_node)
3195

    
3196
    if instance.admin_up:
3197
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3198
                                 self.op.instance_name)
3199
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3200
                                              instance.name,
3201
                                              instance.hypervisor)
3202
    remote_info.Raise()
3203
    if remote_info.data:
3204
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3205
                                 (self.op.instance_name,
3206
                                  instance.primary_node))
3207
    self.instance = instance
3208

    
3209
    # new name verification
3210
    name_info = utils.HostInfo(self.op.new_name)
3211

    
3212
    self.op.new_name = new_name = name_info.name
3213
    instance_list = self.cfg.GetInstanceList()
3214
    if new_name in instance_list:
3215
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3216
                                 new_name)
3217

    
3218
    if not getattr(self.op, "ignore_ip", False):
3219
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
3220
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
3221
                                   (name_info.ip, new_name))
3222

    
3223

    
3224
  def Exec(self, feedback_fn):
3225
    """Reinstall the instance.
3226

3227
    """
3228
    inst = self.instance
3229
    old_name = inst.name
3230

    
3231
    if inst.disk_template == constants.DT_FILE:
3232
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3233

    
3234
    self.cfg.RenameInstance(inst.name, self.op.new_name)
3235
    # Change the instance lock. This is definitely safe while we hold the BGL
3236
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
3237
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
3238

    
3239
    # re-read the instance from the configuration after rename
3240
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
3241

    
3242
    if inst.disk_template == constants.DT_FILE:
3243
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3244
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
3245
                                                     old_file_storage_dir,
3246
                                                     new_file_storage_dir)
3247
      result.Raise()
3248
      if not result.data:
3249
        raise errors.OpExecError("Could not connect to node '%s' to rename"
3250
                                 " directory '%s' to '%s' (but the instance"
3251
                                 " has been renamed in Ganeti)" % (
3252
                                 inst.primary_node, old_file_storage_dir,
3253
                                 new_file_storage_dir))
3254

    
3255
      if not result.data[0]:
3256
        raise errors.OpExecError("Could not rename directory '%s' to '%s'"
3257
                                 " (but the instance has been renamed in"
3258
                                 " Ganeti)" % (old_file_storage_dir,
3259
                                               new_file_storage_dir))
3260

    
3261
    _StartInstanceDisks(self, inst, None)
3262
    try:
3263
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
3264
                                                 old_name)
3265
      msg = result.RemoteFailMsg()
3266
      if msg:
3267
        msg = ("Could not run OS rename script for instance %s on node %s"
3268
               " (but the instance has been renamed in Ganeti): %s" %
3269
               (inst.name, inst.primary_node, msg))
3270
        self.proc.LogWarning(msg)
3271
    finally:
3272
      _ShutdownInstanceDisks(self, inst)
3273

    
3274

    
3275
class LURemoveInstance(LogicalUnit):
3276
  """Remove an instance.
3277

3278
  """
3279
  HPATH = "instance-remove"
3280
  HTYPE = constants.HTYPE_INSTANCE
3281
  _OP_REQP = ["instance_name", "ignore_failures"]
3282
  REQ_BGL = False
3283

    
3284
  def ExpandNames(self):
3285
    self._ExpandAndLockInstance()
3286
    self.needed_locks[locking.LEVEL_NODE] = []
3287
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3288

    
3289
  def DeclareLocks(self, level):
3290
    if level == locking.LEVEL_NODE:
3291
      self._LockInstancesNodes()
3292

    
3293
  def BuildHooksEnv(self):
3294
    """Build hooks env.
3295

3296
    This runs on master, primary and secondary nodes of the instance.
3297

3298
    """
3299
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3300
    nl = [self.cfg.GetMasterNode()]
3301
    return env, nl, nl
3302

    
3303
  def CheckPrereq(self):
3304
    """Check prerequisites.
3305

3306
    This checks that the instance is in the cluster.
3307

3308
    """
3309
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3310
    assert self.instance is not None, \
3311
      "Cannot retrieve locked instance %s" % self.op.instance_name
3312

    
3313
  def Exec(self, feedback_fn):
3314
    """Remove the instance.
3315

3316
    """
3317
    instance = self.instance
3318
    logging.info("Shutting down instance %s on node %s",
3319
                 instance.name, instance.primary_node)
3320

    
3321
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
3322
    msg = result.RemoteFailMsg()
3323
    if msg:
3324
      if self.op.ignore_failures:
3325
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
3326
      else:
3327
        raise errors.OpExecError("Could not shutdown instance %s on"
3328
                                 " node %s: %s" %
3329
                                 (instance.name, instance.primary_node, msg))
3330

    
3331
    logging.info("Removing block devices for instance %s", instance.name)
3332

    
3333
    if not _RemoveDisks(self, instance):
3334
      if self.op.ignore_failures:
3335
        feedback_fn("Warning: can't remove instance's disks")
3336
      else:
3337
        raise errors.OpExecError("Can't remove instance's disks")
3338

    
3339
    logging.info("Removing instance %s out of cluster config", instance.name)
3340

    
3341
    self.cfg.RemoveInstance(instance.name)
3342
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
3343

    
3344

    
3345
class LUQueryInstances(NoHooksLU):
3346
  """Logical unit for querying instances.
3347

3348
  """
3349
  _OP_REQP = ["output_fields", "names", "use_locking"]
3350
  REQ_BGL = False
3351
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
3352
                                    "admin_state",
3353
                                    "disk_template", "ip", "mac", "bridge",
3354
                                    "sda_size", "sdb_size", "vcpus", "tags",
3355
                                    "network_port", "beparams",
3356
                                    r"(disk)\.(size)/([0-9]+)",
3357
                                    r"(disk)\.(sizes)", "disk_usage",
3358
                                    r"(nic)\.(mac|ip|bridge)/([0-9]+)",
3359
                                    r"(nic)\.(macs|ips|bridges)",
3360
                                    r"(disk|nic)\.(count)",
3361
                                    "serial_no", "hypervisor", "hvparams",] +
3362
                                  ["hv/%s" % name
3363
                                   for name in constants.HVS_PARAMETERS] +
3364
                                  ["be/%s" % name
3365
                                   for name in constants.BES_PARAMETERS])
3366
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
3367

    
3368

    
3369
  def ExpandNames(self):
3370
    _CheckOutputFields(static=self._FIELDS_STATIC,
3371
                       dynamic=self._FIELDS_DYNAMIC,
3372
                       selected=self.op.output_fields)
3373

    
3374
    self.needed_locks = {}
3375
    self.share_locks[locking.LEVEL_INSTANCE] = 1
3376
    self.share_locks[locking.LEVEL_NODE] = 1
3377

    
3378
    if self.op.names:
3379
      self.wanted = _GetWantedInstances(self, self.op.names)
3380
    else:
3381
      self.wanted = locking.ALL_SET
3382

    
3383
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3384
    self.do_locking = self.do_node_query and self.op.use_locking
3385
    if self.do_locking:
3386
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3387
      self.needed_locks[locking.LEVEL_NODE] = []
3388
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3389

    
3390
  def DeclareLocks(self, level):
3391
    if level == locking.LEVEL_NODE and self.do_locking:
3392
      self._LockInstancesNodes()
3393

    
3394
  def CheckPrereq(self):
3395
    """Check prerequisites.
3396

3397
    """
3398
    pass
3399

    
3400
  def Exec(self, feedback_fn):
3401
    """Computes the list of nodes and their attributes.
3402

3403
    """
3404
    all_info = self.cfg.GetAllInstancesInfo()
3405
    if self.wanted == locking.ALL_SET:
3406
      # caller didn't specify instance names, so ordering is not important
3407
      if self.do_locking:
3408
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
3409
      else:
3410
        instance_names = all_info.keys()
3411
      instance_names = utils.NiceSort(instance_names)
3412
    else:
3413
      # caller did specify names, so we must keep the ordering
3414
      if self.do_locking:
3415
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
3416
      else:
3417
        tgt_set = all_info.keys()
3418
      missing = set(self.wanted).difference(tgt_set)
3419
      if missing:
3420
        raise errors.OpExecError("Some instances were removed before"
3421
                                 " retrieving their data: %s" % missing)
3422
      instance_names = self.wanted
3423

    
3424
    instance_list = [all_info[iname] for iname in instance_names]
3425

    
3426
    # begin data gathering
3427

    
3428
    nodes = frozenset([inst.primary_node for inst in instance_list])
3429
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
3430

    
3431
    bad_nodes = []
3432
    off_nodes = []
3433
    if self.do_node_query:
3434
      live_data = {}
3435
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
3436
      for name in nodes:
3437
        result = node_data[name]
3438
        if result.offline:
3439
          # offline nodes will be in both lists
3440
          off_nodes.append(name)
3441
        if result.failed:
3442
          bad_nodes.append(name)
3443
        else:
3444
          if result.data:
3445
            live_data.update(result.data)
3446
            # else no instance is alive
3447
    else:
3448
      live_data = dict([(name, {}) for name in instance_names])
3449

    
3450
    # end data gathering
3451

    
3452
    HVPREFIX = "hv/"
3453
    BEPREFIX = "be/"
3454
    output = []
3455
    for instance in instance_list:
3456
      iout = []
3457
      i_hv = self.cfg.GetClusterInfo().FillHV(instance)
3458
      i_be = self.cfg.GetClusterInfo().FillBE(instance)
3459
      for field in self.op.output_fields:
3460
        st_match = self._FIELDS_STATIC.Matches(field)
3461
        if field == "name":
3462
          val = instance.name
3463
        elif field == "os":
3464
          val = instance.os
3465
        elif field == "pnode":
3466
          val = instance.primary_node
3467
        elif field == "snodes":
3468
          val = list(instance.secondary_nodes)
3469
        elif field == "admin_state":
3470
          val = instance.admin_up
3471
        elif field == "oper_state":
3472
          if instance.primary_node in bad_nodes:
3473
            val = None
3474
          else:
3475
            val = bool(live_data.get(instance.name))
3476
        elif field == "status":
3477
          if instance.primary_node in off_nodes:
3478
            val = "ERROR_nodeoffline"
3479
          elif instance.primary_node in bad_nodes:
3480
            val = "ERROR_nodedown"
3481
          else:
3482
            running = bool(live_data.get(instance.name))
3483
            if running:
3484
              if instance.admin_up:
3485
                val = "running"
3486
              else:
3487
                val = "ERROR_up"
3488
            else:
3489
              if instance.admin_up:
3490
                val = "ERROR_down"
3491
              else:
3492
                val = "ADMIN_down"
3493
        elif field == "oper_ram":
3494
          if instance.primary_node in bad_nodes:
3495
            val = None
3496
          elif instance.name in live_data:
3497
            val = live_data[instance.name].get("memory", "?")
3498
          else:
3499
            val = "-"
3500
        elif field == "disk_template":
3501
          val = instance.disk_template
3502
        elif field == "ip":
3503
          val = instance.nics[0].ip
3504
        elif field == "bridge":
3505
          val = instance.nics[0].bridge
3506
        elif field == "mac":
3507
          val = instance.nics[0].mac
3508
        elif field == "sda_size" or field == "sdb_size":
3509
          idx = ord(field[2]) - ord('a')
3510
          try:
3511
            val = instance.FindDisk(idx).size
3512
          except errors.OpPrereqError:
3513
            val = None
3514
        elif field == "disk_usage": # total disk usage per node
3515
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
3516
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
3517
        elif field == "tags":
3518
          val = list(instance.GetTags())
3519
        elif field == "serial_no":
3520
          val = instance.serial_no
3521
        elif field == "network_port":
3522
          val = instance.network_port
3523
        elif field == "hypervisor":
3524
          val = instance.hypervisor
3525
        elif field == "hvparams":
3526
          val = i_hv
3527
        elif (field.startswith(HVPREFIX) and
3528
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
3529
          val = i_hv.get(field[len(HVPREFIX):], None)
3530
        elif field == "beparams":
3531
          val = i_be
3532
        elif (field.startswith(BEPREFIX) and
3533
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
3534
          val = i_be.get(field[len(BEPREFIX):], None)
3535
        elif st_match and st_match.groups():
3536
          # matches a variable list
3537
          st_groups = st_match.groups()
3538
          if st_groups and st_groups[0] == "disk":
3539
            if st_groups[1] == "count":
3540
              val = len(instance.disks)
3541
            elif st_groups[1] == "sizes":
3542
              val = [disk.size for disk in instance.disks]
3543
            elif st_groups[1] == "size":
3544
              try:
3545
                val = instance.FindDisk(st_groups[2]).size
3546
              except errors.OpPrereqError:
3547
                val = None
3548
            else:
3549
              assert False, "Unhandled disk parameter"
3550
          elif st_groups[0] == "nic":
3551
            if st_groups[1] == "count":
3552
              val = len(instance.nics)
3553
            elif st_groups[1] == "macs":
3554
              val = [nic.mac for nic in instance.nics]
3555
            elif st_groups[1] == "ips":
3556
              val = [nic.ip for nic in instance.nics]
3557
            elif st_groups[1] == "bridges":
3558
              val = [nic.bridge for nic in instance.nics]
3559
            else:
3560
              # index-based item
3561
              nic_idx = int(st_groups[2])
3562
              if nic_idx >= len(instance.nics):
3563
                val = None
3564
              else:
3565
                if st_groups[1] == "mac":
3566
                  val = instance.nics[nic_idx].mac
3567
                elif st_groups[1] == "ip":
3568
                  val = instance.nics[nic_idx].ip
3569
                elif st_groups[1] == "bridge":
3570
                  val = instance.nics[nic_idx].bridge
3571
                else:
3572
                  assert False, "Unhandled NIC parameter"
3573
          else:
3574
            assert False, "Unhandled variable parameter"
3575
        else:
3576
          raise errors.ParameterError(field)
3577
        iout.append(val)
3578
      output.append(iout)
3579

    
3580
    return output
3581

    
3582

    
3583
class LUFailoverInstance(LogicalUnit):
3584
  """Failover an instance.
3585

3586
  """
3587
  HPATH = "instance-failover"
3588
  HTYPE = constants.HTYPE_INSTANCE
3589
  _OP_REQP = ["instance_name", "ignore_consistency"]
3590
  REQ_BGL = False
3591

    
3592
  def ExpandNames(self):
3593
    self._ExpandAndLockInstance()
3594
    self.needed_locks[locking.LEVEL_NODE] = []
3595
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3596

    
3597
  def DeclareLocks(self, level):
3598
    if level == locking.LEVEL_NODE:
3599
      self._LockInstancesNodes()
3600

    
3601
  def BuildHooksEnv(self):
3602
    """Build hooks env.
3603

3604
    This runs on master, primary and secondary nodes of the instance.
3605

3606
    """
3607
    env = {
3608
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
3609
      }
3610
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3611
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
3612
    return env, nl, nl
3613

    
3614
  def CheckPrereq(self):
3615
    """Check prerequisites.
3616

3617
    This checks that the instance is in the cluster.
3618

3619
    """
3620
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3621
    assert self.instance is not None, \
3622
      "Cannot retrieve locked instance %s" % self.op.instance_name
3623

    
3624
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3625
    if instance.disk_template not in constants.DTS_NET_MIRROR:
3626
      raise errors.OpPrereqError("Instance's disk layout is not"
3627
                                 " network mirrored, cannot failover.")
3628

    
3629
    secondary_nodes = instance.secondary_nodes
3630
    if not secondary_nodes:
3631
      raise errors.ProgrammerError("no secondary node but using "
3632
                                   "a mirrored disk template")
3633

    
3634
    target_node = secondary_nodes[0]
3635
    _CheckNodeOnline(self, target_node)
3636
    _CheckNodeNotDrained(self, target_node)
3637
    # check memory requirements on the secondary node
3638
    _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
3639
                         instance.name, bep[constants.BE_MEMORY],
3640
                         instance.hypervisor)
3641
    # check bridge existance
3642
    _CheckInstanceBridgesExist(self, instance, node=target_node)
3643

    
3644
  def Exec(self, feedback_fn):
3645
    """Failover an instance.
3646

3647
    The failover is done by shutting it down on its present node and
3648
    starting it on the secondary.
3649

3650
    """
3651
    instance = self.instance
3652

    
3653
    source_node = instance.primary_node
3654
    target_node = instance.secondary_nodes[0]
3655

    
3656
    feedback_fn("* checking disk consistency between source and target")
3657
    for dev in instance.disks:
3658
      # for drbd, these are drbd over lvm
3659
      if not _CheckDiskConsistency(self, dev, target_node, False):
3660
        if instance.admin_up and not self.op.ignore_consistency:
3661
          raise errors.OpExecError("Disk %s is degraded on target node,"
3662
                                   " aborting failover." % dev.iv_name)
3663

    
3664
    feedback_fn("* shutting down instance on source node")
3665
    logging.info("Shutting down instance %s on node %s",
3666
                 instance.name, source_node)
3667

    
3668
    result = self.rpc.call_instance_shutdown(source_node, instance)
3669
    msg = result.RemoteFailMsg()
3670
    if msg:
3671
      if self.op.ignore_consistency:
3672
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
3673
                             " Proceeding anyway. Please make sure node"
3674
                             " %s is down. Error details: %s",
3675
                             instance.name, source_node, source_node, msg)
3676
      else:
3677
        raise errors.OpExecError("Could not shutdown instance %s on"
3678
                                 " node %s: %s" %
3679
                                 (instance.name, source_node, msg))
3680

    
3681
    feedback_fn("* deactivating the instance's disks on source node")
3682
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
3683
      raise errors.OpExecError("Can't shut down the instance's disks.")
3684

    
3685
    instance.primary_node = target_node
3686
    # distribute new instance config to the other nodes
3687
    self.cfg.Update(instance)
3688

    
3689
    # Only start the instance if it's marked as up
3690
    if instance.admin_up:
3691
      feedback_fn("* activating the instance's disks on target node")
3692
      logging.info("Starting instance %s on node %s",
3693
                   instance.name, target_node)
3694

    
3695
      disks_ok, dummy = _AssembleInstanceDisks(self, instance,
3696
                                               ignore_secondaries=True)
3697
      if not disks_ok:
3698
        _ShutdownInstanceDisks(self, instance)
3699
        raise errors.OpExecError("Can't activate the instance's disks")
3700

    
3701
      feedback_fn("* starting the instance on the target node")
3702
      result = self.rpc.call_instance_start(target_node, instance, None, None)
3703
      msg = result.RemoteFailMsg()
3704
      if msg:
3705
        _ShutdownInstanceDisks(self, instance)
3706
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
3707
                                 (instance.name, target_node, msg))
3708

    
3709

    
3710
class LUMigrateInstance(LogicalUnit):
3711
  """Migrate an instance.
3712

3713
  This is migration without shutting down, compared to the failover,
3714
  which is done with shutdown.
3715

3716
  """
3717
  HPATH = "instance-migrate"
3718
  HTYPE = constants.HTYPE_INSTANCE
3719
  _OP_REQP = ["instance_name", "live", "cleanup"]
3720

    
3721
  REQ_BGL = False
3722

    
3723
  def ExpandNames(self):
3724
    self._ExpandAndLockInstance()
3725
    self.needed_locks[locking.LEVEL_NODE] = []
3726
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3727

    
3728
  def DeclareLocks(self, level):
3729
    if level == locking.LEVEL_NODE:
3730
      self._LockInstancesNodes()
3731

    
3732
  def BuildHooksEnv(self):
3733
    """Build hooks env.
3734

3735
    This runs on master, primary and secondary nodes of the instance.
3736

3737
    """
3738
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3739
    env["MIGRATE_LIVE"] = self.op.live
3740
    env["MIGRATE_CLEANUP"] = self.op.cleanup
3741
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
3742
    return env, nl, nl
3743

    
3744
  def CheckPrereq(self):
3745
    """Check prerequisites.
3746

3747
    This checks that the instance is in the cluster.
3748

3749
    """
3750
    instance = self.cfg.GetInstanceInfo(
3751
      self.cfg.ExpandInstanceName(self.op.instance_name))
3752
    if instance is None:
3753
      raise errors.OpPrereqError("Instance '%s' not known" %
3754
                                 self.op.instance_name)
3755

    
3756
    if instance.disk_template != constants.DT_DRBD8:
3757
      raise errors.OpPrereqError("Instance's disk layout is not"
3758
                                 " drbd8, cannot migrate.")
3759

    
3760
    secondary_nodes = instance.secondary_nodes
3761
    if not secondary_nodes:
3762
      raise errors.ConfigurationError("No secondary node but using"
3763
                                      " drbd8 disk template")
3764

    
3765
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
3766

    
3767
    target_node = secondary_nodes[0]
3768
    # check memory requirements on the secondary node
3769
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
3770
                         instance.name, i_be[constants.BE_MEMORY],
3771
                         instance.hypervisor)
3772

    
3773
    # check bridge existance
3774
    _CheckInstanceBridgesExist(self, instance, node=target_node)
3775

    
3776
    if not self.op.cleanup:
3777
      _CheckNodeNotDrained(self, target_node)
3778
      result = self.rpc.call_instance_migratable(instance.primary_node,
3779
                                                 instance)
3780
      msg = result.RemoteFailMsg()
3781
      if msg:
3782
        raise errors.OpPrereqError("Can't migrate: %s - please use failover" %
3783
                                   msg)
3784

    
3785
    self.instance = instance
3786

    
3787
  def _WaitUntilSync(self):
3788
    """Poll with custom rpc for disk sync.
3789

3790
    This uses our own step-based rpc call.
3791

3792
    """
3793
    self.feedback_fn("* wait until resync is done")
3794
    all_done = False
3795
    while not all_done:
3796
      all_done = True
3797
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
3798
                                            self.nodes_ip,
3799
                                            self.instance.disks)
3800
      min_percent = 100
3801
      for node, nres in result.items():
3802
        msg = nres.RemoteFailMsg()
3803
        if msg:
3804
          raise errors.OpExecError("Cannot resync disks on node %s: %s" %
3805
                                   (node, msg))
3806
        node_done, node_percent = nres.payload
3807
        all_done = all_done and node_done
3808
        if node_percent is not None:
3809
          min_percent = min(min_percent, node_percent)
3810
      if not all_done:
3811
        if min_percent < 100:
3812
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
3813
        time.sleep(2)
3814

    
3815
  def _EnsureSecondary(self, node):
3816
    """Demote a node to secondary.
3817

3818
    """
3819
    self.feedback_fn("* switching node %s to secondary mode" % node)
3820

    
3821
    for dev in self.instance.disks:
3822
      self.cfg.SetDiskID(dev, node)
3823

    
3824
    result = self.rpc.call_blockdev_close(node, self.instance.name,
3825
                                          self.instance.disks)
3826
    msg = result.RemoteFailMsg()
3827
    if msg:
3828
      raise errors.OpExecError("Cannot change disk to secondary on node %s,"
3829
                               " error %s" % (node, msg))
3830

    
3831
  def _GoStandalone(self):
3832
    """Disconnect from the network.
3833

3834
    """
3835
    self.feedback_fn("* changing into standalone mode")
3836
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
3837
                                               self.instance.disks)
3838
    for node, nres in result.items():
3839
      msg = nres.RemoteFailMsg()
3840
      if msg:
3841
        raise errors.OpExecError("Cannot disconnect disks node %s,"
3842
                                 " error %s" % (node, msg))
3843

    
3844
  def _GoReconnect(self, multimaster):
3845
    """Reconnect to the network.
3846

3847
    """
3848
    if multimaster:
3849
      msg = "dual-master"
3850
    else:
3851
      msg = "single-master"
3852
    self.feedback_fn("* changing disks into %s mode" % msg)
3853
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
3854
                                           self.instance.disks,
3855
                                           self.instance.name, multimaster)
3856
    for node, nres in result.items():
3857
      msg = nres.RemoteFailMsg()
3858
      if msg:
3859
        raise errors.OpExecError("Cannot change disks config on node %s,"
3860
                                 " error: %s" % (node, msg))
3861

    
3862
  def _ExecCleanup(self):
3863
    """Try to cleanup after a failed migration.
3864

3865
    The cleanup is done by:
3866
      - check that the instance is running only on one node
3867
        (and update the config if needed)
3868
      - change disks on its secondary node to secondary
3869
      - wait until disks are fully synchronized
3870
      - disconnect from the network
3871
      - change disks into single-master mode
3872
      - wait again until disks are fully synchronized
3873

3874
    """
3875
    instance = self.instance
3876
    target_node = self.target_node
3877
    source_node = self.source_node
3878

    
3879
    # check running on only one node
3880
    self.feedback_fn("* checking where the instance actually runs"
3881
                     " (if this hangs, the hypervisor might be in"
3882
                     " a bad state)")
3883
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
3884
    for node, result in ins_l.items():
3885
      result.Raise()
3886
      if not isinstance(result.data, list):
3887
        raise errors.OpExecError("Can't contact node '%s'" % node)
3888

    
3889
    runningon_source = instance.name in ins_l[source_node].data
3890
    runningon_target = instance.name in ins_l[target_node].data
3891

    
3892
    if runningon_source and runningon_target:
3893
      raise errors.OpExecError("Instance seems to be running on two nodes,"
3894
                               " or the hypervisor is confused. You will have"
3895
                               " to ensure manually that it runs only on one"
3896
                               " and restart this operation.")
3897

    
3898
    if not (runningon_source or runningon_target):
3899
      raise errors.OpExecError("Instance does not seem to be running at all."
3900
                               " In this case, it's safer to repair by"
3901
                               " running 'gnt-instance stop' to ensure disk"
3902
                               " shutdown, and then restarting it.")
3903

    
3904
    if runningon_target:
3905
      # the migration has actually succeeded, we need to update the config
3906
      self.feedback_fn("* instance running on secondary node (%s),"
3907
                       " updating config" % target_node)
3908
      instance.primary_node = target_node
3909
      self.cfg.Update(instance)
3910
      demoted_node = source_node
3911
    else:
3912
      self.feedback_fn("* instance confirmed to be running on its"
3913
                       " primary node (%s)" % source_node)
3914
      demoted_node = target_node
3915

    
3916
    self._EnsureSecondary(demoted_node)
3917
    try:
3918
      self._WaitUntilSync()
3919
    except errors.OpExecError:
3920
      # we ignore here errors, since if the device is standalone, it
3921
      # won't be able to sync
3922
      pass
3923
    self._GoStandalone()
3924
    self._GoReconnect(False)
3925
    self._WaitUntilSync()
3926

    
3927
    self.feedback_fn("* done")
3928

    
3929
  def _RevertDiskStatus(self):
3930
    """Try to revert the disk status after a failed migration.
3931

3932
    """
3933
    target_node = self.target_node
3934
    try:
3935
      self._EnsureSecondary(target_node)
3936
      self._GoStandalone()
3937
      self._GoReconnect(False)
3938
      self._WaitUntilSync()
3939
    except errors.OpExecError, err:
3940
      self.LogWarning("Migration failed and I can't reconnect the"
3941
                      " drives: error '%s'\n"
3942
                      "Please look and recover the instance status" %
3943
                      str(err))
3944

    
3945
  def _AbortMigration(self):
3946
    """Call the hypervisor code to abort a started migration.
3947

3948
    """
3949
    instance = self.instance
3950
    target_node = self.target_node
3951
    migration_info = self.migration_info
3952

    
3953
    abort_result = self.rpc.call_finalize_migration(target_node,
3954
                                                    instance,
3955
                                                    migration_info,
3956
                                                    False)
3957
    abort_msg = abort_result.RemoteFailMsg()
3958
    if abort_msg:
3959
      logging.error("Aborting migration failed on target node %s: %s" %
3960
                    (target_node, abort_msg))
3961
      # Don't raise an exception here, as we stil have to try to revert the
3962
      # disk status, even if this step failed.
3963

    
3964
  def _ExecMigration(self):
3965
    """Migrate an instance.
3966

3967
    The migrate is done by:
3968
      - change the disks into dual-master mode
3969
      - wait until disks are fully synchronized again
3970
      - migrate the instance
3971
      - change disks on the new secondary node (the old primary) to secondary
3972
      - wait until disks are fully synchronized
3973
      - change disks into single-master mode
3974

3975
    """
3976
    instance = self.instance
3977
    target_node = self.target_node
3978
    source_node = self.source_node
3979

    
3980
    self.feedback_fn("* checking disk consistency between source and target")
3981
    for dev in instance.disks:
3982
      if not _CheckDiskConsistency(self, dev, target_node, False):
3983
        raise errors.OpExecError("Disk %s is degraded or not fully"
3984
                                 " synchronized on target node,"
3985
                                 " aborting migrate." % dev.iv_name)
3986

    
3987
    # First get the migration information from the remote node
3988
    result = self.rpc.call_migration_info(source_node, instance)
3989
    msg = result.RemoteFailMsg()
3990
    if msg:
3991
      log_err = ("Failed fetching source migration information from %s: %s" %
3992
                 (source_node, msg))
3993
      logging.error(log_err)
3994
      raise errors.OpExecError(log_err)
3995

    
3996
    self.migration_info = migration_info = result.payload
3997

    
3998
    # Then switch the disks to master/master mode
3999
    self._EnsureSecondary(target_node)
4000
    self._GoStandalone()
4001
    self._GoReconnect(True)
4002
    self._WaitUntilSync()
4003

    
4004
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
4005
    result = self.rpc.call_accept_instance(target_node,
4006
                                           instance,
4007
                                           migration_info,
4008
                                           self.nodes_ip[target_node])
4009

    
4010
    msg = result.RemoteFailMsg()
4011
    if msg:
4012
      logging.error("Instance pre-migration failed, trying to revert"
4013
                    " disk status: %s", msg)
4014
      self._AbortMigration()
4015
      self._RevertDiskStatus()
4016
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
4017
                               (instance.name, msg))
4018

    
4019
    self.feedback_fn("* migrating instance to %s" % target_node)
4020
    time.sleep(10)
4021
    result = self.rpc.call_instance_migrate(source_node, instance,
4022
                                            self.nodes_ip[target_node],
4023
                                            self.op.live)
4024
    msg = result.RemoteFailMsg()
4025
    if msg:
4026
      logging.error("Instance migration failed, trying to revert"
4027
                    " disk status: %s", msg)
4028
      self._AbortMigration()
4029
      self._RevertDiskStatus()
4030
      raise errors.OpExecError("Could not migrate instance %s: %s" %
4031
                               (instance.name, msg))
4032
    time.sleep(10)
4033

    
4034
    instance.primary_node = target_node
4035
    # distribute new instance config to the other nodes
4036
    self.cfg.Update(instance)
4037

    
4038
    result = self.rpc.call_finalize_migration(target_node,
4039
                                              instance,
4040
                                              migration_info,
4041
                                              True)
4042
    msg = result.RemoteFailMsg()
4043
    if msg:
4044
      logging.error("Instance migration succeeded, but finalization failed:"
4045
                    " %s" % msg)
4046
      raise errors.OpExecError("Could not finalize instance migration: %s" %
4047
                               msg)
4048

    
4049
    self._EnsureSecondary(source_node)
4050
    self._WaitUntilSync()
4051
    self._GoStandalone()
4052
    self._GoReconnect(False)
4053
    self._WaitUntilSync()
4054

    
4055
    self.feedback_fn("* done")
4056

    
4057
  def Exec(self, feedback_fn):
4058
    """Perform the migration.
4059

4060
    """
4061
    self.feedback_fn = feedback_fn
4062

    
4063
    self.source_node = self.instance.primary_node
4064
    self.target_node = self.instance.secondary_nodes[0]
4065
    self.all_nodes = [self.source_node, self.target_node]
4066
    self.nodes_ip = {
4067
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
4068
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
4069
      }
4070
    if self.op.cleanup:
4071
      return self._ExecCleanup()
4072
    else:
4073
      return self._ExecMigration()
4074

    
4075

    
4076
def _CreateBlockDev(lu, node, instance, device, force_create,
4077
                    info, force_open):
4078
  """Create a tree of block devices on a given node.
4079

4080
  If this device type has to be created on secondaries, create it and
4081
  all its children.
4082

4083
  If not, just recurse to children keeping the same 'force' value.
4084

4085
  @param lu: the lu on whose behalf we execute
4086
  @param node: the node on which to create the device
4087
  @type instance: L{objects.Instance}
4088
  @param instance: the instance which owns the device
4089
  @type device: L{objects.Disk}
4090
  @param device: the device to create
4091
  @type force_create: boolean
4092
  @param force_create: whether to force creation of this device; this
4093
      will be change to True whenever we find a device which has
4094
      CreateOnSecondary() attribute
4095
  @param info: the extra 'metadata' we should attach to the device
4096
      (this will be represented as a LVM tag)
4097
  @type force_open: boolean
4098
  @param force_open: this parameter will be passes to the
4099
      L{backend.BlockdevCreate} function where it specifies
4100
      whether we run on primary or not, and it affects both
4101
      the child assembly and the device own Open() execution
4102

4103
  """
4104
  if device.CreateOnSecondary():
4105
    force_create = True
4106

    
4107
  if device.children:
4108
    for child in device.children:
4109
      _CreateBlockDev(lu, node, instance, child, force_create,
4110
                      info, force_open)
4111

    
4112
  if not force_create:
4113
    return
4114

    
4115
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
4116

    
4117

    
4118
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
4119
  """Create a single block device on a given node.
4120

4121
  This will not recurse over children of the device, so they must be
4122
  created in advance.
4123

4124
  @param lu: the lu on whose behalf we execute
4125
  @param node: the node on which to create the device
4126
  @type instance: L{objects.Instance}
4127
  @param instance: the instance which owns the device
4128
  @type device: L{objects.Disk}
4129
  @param device: the device to create
4130
  @param info: the extra 'metadata' we should attach to the device
4131
      (this will be represented as a LVM tag)
4132
  @type force_open: boolean
4133
  @param force_open: this parameter will be passes to the
4134
      L{backend.BlockdevCreate} function where it specifies
4135
      whether we run on primary or not, and it affects both
4136
      the child assembly and the device own Open() execution
4137

4138
  """
4139
  lu.cfg.SetDiskID(device, node)
4140
  result = lu.rpc.call_blockdev_create(node, device, device.size,
4141
                                       instance.name, force_open, info)
4142
  msg = result.RemoteFailMsg()
4143
  if msg:
4144
    raise errors.OpExecError("Can't create block device %s on"
4145
                             " node %s for instance %s: %s" %
4146
                             (device, node, instance.name, msg))
4147
  if device.physical_id is None:
4148
    device.physical_id = result.payload
4149

    
4150

    
4151
def _GenerateUniqueNames(lu, exts):
4152
  """Generate a suitable LV name.
4153

4154
  This will generate a logical volume name for the given instance.
4155

4156
  """
4157
  results = []
4158
  for val in exts:
4159
    new_id = lu.cfg.GenerateUniqueID()
4160
    results.append("%s%s" % (new_id, val))
4161
  return results
4162

    
4163

    
4164
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
4165
                         p_minor, s_minor):
4166
  """Generate a drbd8 device complete with its children.
4167

4168
  """
4169
  port = lu.cfg.AllocatePort()
4170
  vgname = lu.cfg.GetVGName()
4171
  shared_secret = lu.cfg.GenerateDRBDSecret()
4172
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
4173
                          logical_id=(vgname, names[0]))
4174
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
4175
                          logical_id=(vgname, names[1]))
4176
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
4177
                          logical_id=(primary, secondary, port,
4178
                                      p_minor, s_minor,
4179
                                      shared_secret),
4180
                          children=[dev_data, dev_meta],
4181
                          iv_name=iv_name)
4182
  return drbd_dev
4183

    
4184

    
4185
def _GenerateDiskTemplate(lu, template_name,
4186
                          instance_name, primary_node,
4187
                          secondary_nodes, disk_info,
4188
                          file_storage_dir, file_driver,
4189
                          base_index):
4190
  """Generate the entire disk layout for a given template type.
4191

4192
  """
4193
  #TODO: compute space requirements
4194

    
4195
  vgname = lu.cfg.GetVGName()
4196
  disk_count = len(disk_info)
4197
  disks = []
4198
  if template_name == constants.DT_DISKLESS:
4199
    pass
4200
  elif template_name == constants.DT_PLAIN:
4201
    if len(secondary_nodes) != 0:
4202
      raise errors.ProgrammerError("Wrong template configuration")
4203

    
4204
    names = _GenerateUniqueNames(lu, [".disk%d" % i
4205
                                      for i in range(disk_count)])
4206
    for idx, disk in enumerate(disk_info):
4207
      disk_index = idx + base_index
4208
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
4209
                              logical_id=(vgname, names[idx]),
4210
                              iv_name="disk/%d" % disk_index,
4211
                              mode=disk["mode"])
4212
      disks.append(disk_dev)
4213
  elif template_name == constants.DT_DRBD8:
4214
    if len(secondary_nodes) != 1:
4215
      raise errors.ProgrammerError("Wrong template configuration")
4216
    remote_node = secondary_nodes[0]
4217
    minors = lu.cfg.AllocateDRBDMinor(
4218
      [primary_node, remote_node] * len(disk_info), instance_name)
4219

    
4220
    names = []
4221
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % i
4222
                                               for i in range(disk_count)]):
4223
      names.append(lv_prefix + "_data")
4224
      names.append(lv_prefix + "_meta")
4225
    for idx, disk in enumerate(disk_info):
4226
      disk_index = idx + base_index
4227
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
4228
                                      disk["size"], names[idx*2:idx*2+2],
4229
                                      "disk/%d" % disk_index,
4230
                                      minors[idx*2], minors[idx*2+1])
4231
      disk_dev.mode = disk["mode"]
4232
      disks.append(disk_dev)
4233
  elif template_name == constants.DT_FILE:
4234
    if len(secondary_nodes) != 0:
4235
      raise errors.ProgrammerError("Wrong template configuration")
4236

    
4237
    for idx, disk in enumerate(disk_info):
4238
      disk_index = idx + base_index
4239
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
4240
                              iv_name="disk/%d" % disk_index,
4241
                              logical_id=(file_driver,
4242
                                          "%s/disk%d" % (file_storage_dir,
4243
                                                         disk_index)),
4244
                              mode=disk["mode"])
4245
      disks.append(disk_dev)
4246
  else:
4247
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
4248
  return disks
4249

    
4250

    
4251
def _GetInstanceInfoText(instance):
4252
  """Compute that text that should be added to the disk's metadata.
4253

4254
  """
4255
  return "originstname+%s" % instance.name
4256

    
4257

    
4258
def _CreateDisks(lu, instance):
4259
  """Create all disks for an instance.
4260

4261
  This abstracts away some work from AddInstance.
4262

4263
  @type lu: L{LogicalUnit}
4264
  @param lu: the logical unit on whose behalf we execute
4265
  @type instance: L{objects.Instance}
4266
  @param instance: the instance whose disks we should create
4267
  @rtype: boolean
4268
  @return: the success of the creation
4269

4270
  """
4271
  info = _GetInstanceInfoText(instance)
4272
  pnode = instance.primary_node
4273

    
4274
  if instance.disk_template == constants.DT_FILE:
4275
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4276
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
4277

    
4278
    if result.failed or not result.data:
4279
      raise errors.OpExecError("Could not connect to node '%s'" % pnode)
4280

    
4281
    if not result.data[0]:
4282
      raise errors.OpExecError("Failed to create directory '%s'" %
4283
                               file_storage_dir)
4284

    
4285
  # Note: this needs to be kept in sync with adding of disks in
4286
  # LUSetInstanceParams
4287
  for device in instance.disks:
4288
    logging.info("Creating volume %s for instance %s",
4289
                 device.iv_name, instance.name)
4290
    #HARDCODE
4291
    for node in instance.all_nodes:
4292
      f_create = node == pnode
4293
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
4294

    
4295

    
4296
def _RemoveDisks(lu, instance):
4297
  """Remove all disks for an instance.
4298

4299
  This abstracts away some work from `AddInstance()` and
4300
  `RemoveInstance()`. Note that in case some of the devices couldn't
4301
  be removed, the removal will continue with the other ones (compare
4302
  with `_CreateDisks()`).
4303

4304
  @type lu: L{LogicalUnit}
4305
  @param lu: the logical unit on whose behalf we execute
4306
  @type instance: L{objects.Instance}
4307
  @param instance: the instance whose disks we should remove
4308
  @rtype: boolean
4309
  @return: the success of the removal
4310

4311
  """
4312
  logging.info("Removing block devices for instance %s", instance.name)
4313

    
4314
  all_result = True
4315
  for device in instance.disks:
4316
    for node, disk in device.ComputeNodeTree(instance.primary_node):
4317
      lu.cfg.SetDiskID(disk, node)
4318
      msg = lu.rpc.call_blockdev_remove(node, disk).RemoteFailMsg()
4319
      if msg:
4320
        lu.LogWarning("Could not remove block device %s on node %s,"
4321
                      " continuing anyway: %s", device.iv_name, node, msg)
4322
        all_result = False
4323

    
4324
  if instance.disk_template == constants.DT_FILE:
4325
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4326
    result = lu.rpc.call_file_storage_dir_remove(instance.primary_node,
4327
                                                 file_storage_dir)
4328
    if result.failed or not result.data:
4329
      logging.error("Could not remove directory '%s'", file_storage_dir)
4330
      all_result = False
4331

    
4332
  return all_result
4333

    
4334

    
4335
def _ComputeDiskSize(disk_template, disks):
4336
  """Compute disk size requirements in the volume group
4337

4338
  """
4339
  # Required free disk space as a function of disk and swap space
4340
  req_size_dict = {
4341
    constants.DT_DISKLESS: None,
4342
    constants.DT_PLAIN: sum(d["size"] for d in disks),
4343
    # 128 MB are added for drbd metadata for each disk
4344
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
4345
    constants.DT_FILE: None,
4346
  }
4347

    
4348
  if disk_template not in req_size_dict:
4349
    raise errors.ProgrammerError("Disk template '%s' size requirement"
4350
                                 " is unknown" %  disk_template)
4351

    
4352
  return req_size_dict[disk_template]
4353

    
4354

    
4355
def _CheckHVParams(lu, nodenames, hvname, hvparams):
4356
  """Hypervisor parameter validation.
4357

4358
  This function abstract the hypervisor parameter validation to be
4359
  used in both instance create and instance modify.
4360

4361
  @type lu: L{LogicalUnit}
4362
  @param lu: the logical unit for which we check
4363
  @type nodenames: list
4364
  @param nodenames: the list of nodes on which we should check
4365
  @type hvname: string
4366
  @param hvname: the name of the hypervisor we should use
4367
  @type hvparams: dict
4368
  @param hvparams: the parameters which we need to check
4369
  @raise errors.OpPrereqError: if the parameters are not valid
4370

4371
  """
4372
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
4373
                                                  hvname,
4374
                                                  hvparams)
4375
  for node in nodenames:
4376
    info = hvinfo[node]
4377
    if info.offline:
4378
      continue
4379
    msg = info.RemoteFailMsg()
4380
    if msg:
4381
      raise errors.OpPrereqError("Hypervisor parameter validation"
4382
                                 " failed on node %s: %s" % (node, msg))
4383

    
4384

    
4385
class LUCreateInstance(LogicalUnit):
4386
  """Create an instance.
4387

4388
  """
4389
  HPATH = "instance-add"
4390
  HTYPE = constants.HTYPE_INSTANCE
4391
  _OP_REQP = ["instance_name", "disks", "disk_template",
4392
              "mode", "start",
4393
              "wait_for_sync", "ip_check", "nics",
4394
              "hvparams", "beparams"]
4395
  REQ_BGL = False
4396

    
4397
  def _ExpandNode(self, node):
4398
    """Expands and checks one node name.
4399

4400
    """
4401
    node_full = self.cfg.ExpandNodeName(node)
4402
    if node_full is None:
4403
      raise errors.OpPrereqError("Unknown node %s" % node)
4404
    return node_full
4405

    
4406
  def ExpandNames(self):
4407
    """ExpandNames for CreateInstance.
4408

4409
    Figure out the right locks for instance creation.
4410

4411
    """
4412
    self.needed_locks = {}
4413

    
4414
    # set optional parameters to none if they don't exist
4415
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
4416
      if not hasattr(self.op, attr):
4417
        setattr(self.op, attr, None)
4418

    
4419
    # cheap checks, mostly valid constants given
4420

    
4421
    # verify creation mode
4422
    if self.op.mode not in (constants.INSTANCE_CREATE,
4423
                            constants.INSTANCE_IMPORT):
4424
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
4425
                                 self.op.mode)
4426

    
4427
    # disk template and mirror node verification
4428
    if self.op.disk_template not in constants.DISK_TEMPLATES:
4429
      raise errors.OpPrereqError("Invalid disk template name")
4430

    
4431
    if self.op.hypervisor is None:
4432
      self.op.hypervisor = self.cfg.GetHypervisorType()
4433

    
4434
    cluster = self.cfg.GetClusterInfo()
4435
    enabled_hvs = cluster.enabled_hypervisors
4436
    if self.op.hypervisor not in enabled_hvs:
4437
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
4438
                                 " cluster (%s)" % (self.op.hypervisor,
4439
                                  ",".join(enabled_hvs)))
4440

    
4441
    # check hypervisor parameter syntax (locally)
4442
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4443
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
4444
                                  self.op.hvparams)
4445
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
4446
    hv_type.CheckParameterSyntax(filled_hvp)
4447

    
4448
    # fill and remember the beparams dict
4449
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4450
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
4451
                                    self.op.beparams)
4452

    
4453
    #### instance parameters check
4454

    
4455
    # instance name verification
4456
    hostname1 = utils.HostInfo(self.op.instance_name)
4457
    self.op.instance_name = instance_name = hostname1.name
4458

    
4459
    # this is just a preventive check, but someone might still add this
4460
    # instance in the meantime, and creation will fail at lock-add time
4461
    if instance_name in self.cfg.GetInstanceList():
4462
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4463
                                 instance_name)
4464

    
4465
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
4466

    
4467
    # NIC buildup
4468
    self.nics = []
4469
    for idx, nic in enumerate(self.op.nics):
4470
      nic_mode_req = nic.get("mode", None)
4471
      nic_mode = nic_mode_req
4472
      if nic_mode is None:
4473
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
4474

    
4475
      # in routed mode, for the first nic, the default ip is 'auto'
4476
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
4477
        default_ip_mode = constants.VALUE_AUTO
4478
      else:
4479
        default_ip_mode = constants.VALUE_NONE
4480

    
4481
      # ip validity checks
4482
      ip = nic.get("ip", default_ip_mode)
4483
      if ip is None or ip.lower() == constants.VALUE_NONE:
4484
        nic_ip = None
4485
      elif ip.lower() == constants.VALUE_AUTO:
4486
        nic_ip = hostname1.ip
4487
      else:
4488
        if not utils.IsValidIP(ip):
4489
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
4490
                                     " like a valid IP" % ip)
4491
        nic_ip = ip
4492

    
4493
      # TODO: check the ip for uniqueness !!
4494
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
4495
        raise errors.OpPrereqError("Routed nic mode requires an ip address")
4496

    
4497
      # MAC address verification
4498
      mac = nic.get("mac", constants.VALUE_AUTO)
4499
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
4500
        if not utils.IsValidMac(mac.lower()):
4501
          raise errors.OpPrereqError("Invalid MAC address specified: %s" %
4502
                                     mac)
4503
      # bridge verification
4504
      bridge = nic.get("bridge", None)
4505
      link = nic.get("link", None)
4506
      if bridge and link:
4507
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link' at the same time")
4508
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
4509
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic")
4510
      elif bridge:
4511
        link = bridge
4512

    
4513
      nicparams = {}
4514
      if nic_mode_req:
4515
        nicparams[constants.NIC_MODE] = nic_mode_req
4516
      if link:
4517
        nicparams[constants.NIC_LINK] = link
4518

    
4519
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4520
                                      nicparams)
4521
      objects.NIC.CheckParameterSyntax(check_params)
4522
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
4523

    
4524
    # disk checks/pre-build
4525
    self.disks = []
4526
    for disk in self.op.disks:
4527
      mode = disk.get("mode", constants.DISK_RDWR)
4528
      if mode not in constants.DISK_ACCESS_SET:
4529
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
4530
                                   mode)
4531
      size = disk.get("size", None)
4532
      if size is None:
4533
        raise errors.OpPrereqError("Missing disk size")
4534
      try:
4535
        size = int(size)
4536
      except ValueError:
4537
        raise errors.OpPrereqError("Invalid disk size '%s'" % size)
4538
      self.disks.append({"size": size, "mode": mode})
4539

    
4540
    # used in CheckPrereq for ip ping check
4541
    self.check_ip = hostname1.ip
4542

    
4543
    # file storage checks
4544
    if (self.op.file_driver and
4545
        not self.op.file_driver in constants.FILE_DRIVER):
4546
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
4547
                                 self.op.file_driver)
4548

    
4549
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
4550
      raise errors.OpPrereqError("File storage directory path not absolute")
4551

    
4552
    ### Node/iallocator related checks
4553
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
4554
      raise errors.OpPrereqError("One and only one of iallocator and primary"
4555
                                 " node must be given")
4556

    
4557
    if self.op.iallocator:
4558
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4559
    else:
4560
      self.op.pnode = self._ExpandNode(self.op.pnode)
4561
      nodelist = [self.op.pnode]
4562
      if self.op.snode is not None:
4563
        self.op.snode = self._ExpandNode(self.op.snode)
4564
        nodelist.append(self.op.snode)
4565
      self.needed_locks[locking.LEVEL_NODE] = nodelist
4566

    
4567
    # in case of import lock the source node too
4568
    if self.op.mode == constants.INSTANCE_IMPORT:
4569
      src_node = getattr(self.op, "src_node", None)
4570
      src_path = getattr(self.op, "src_path", None)
4571

    
4572
      if src_path is None:
4573
        self.op.src_path = src_path = self.op.instance_name
4574

    
4575
      if src_node is None:
4576
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4577
        self.op.src_node = None
4578
        if os.path.isabs(src_path):
4579
          raise errors.OpPrereqError("Importing an instance from an absolute"
4580
                                     " path requires a source node option.")
4581
      else:
4582
        self.op.src_node = src_node = self._ExpandNode(src_node)
4583
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4584
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
4585
        if not os.path.isabs(src_path):
4586
          self.op.src_path = src_path = \
4587
            os.path.join(constants.EXPORT_DIR, src_path)
4588

    
4589
    else: # INSTANCE_CREATE
4590
      if getattr(self.op, "os_type", None) is None:
4591
        raise errors.OpPrereqError("No guest OS specified")
4592

    
4593
  def _RunAllocator(self):
4594
    """Run the allocator based on input opcode.
4595

4596
    """
4597
    nics = [n.ToDict() for n in self.nics]
4598
    ial = IAllocator(self,
4599
                     mode=constants.IALLOCATOR_MODE_ALLOC,
4600
                     name=self.op.instance_name,
4601
                     disk_template=self.op.disk_template,
4602
                     tags=[],
4603
                     os=self.op.os_type,
4604
                     vcpus=self.be_full[constants.BE_VCPUS],
4605
                     mem_size=self.be_full[constants.BE_MEMORY],
4606
                     disks=self.disks,
4607
                     nics=nics,
4608
                     hypervisor=self.op.hypervisor,
4609
                     )
4610

    
4611
    ial.Run(self.op.iallocator)
4612

    
4613
    if not ial.success:
4614
      raise errors.OpPrereqError("Can't compute nodes using"
4615
                                 " iallocator '%s': %s" % (self.op.iallocator,
4616
                                                           ial.info))
4617
    if len(ial.nodes) != ial.required_nodes:
4618
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
4619
                                 " of nodes (%s), required %s" %
4620
                                 (self.op.iallocator, len(ial.nodes),
4621
                                  ial.required_nodes))
4622
    self.op.pnode = ial.nodes[0]
4623
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
4624
                 self.op.instance_name, self.op.iallocator,
4625
                 ", ".join(ial.nodes))
4626
    if ial.required_nodes == 2:
4627
      self.op.snode = ial.nodes[1]
4628

    
4629
  def BuildHooksEnv(self):
4630
    """Build hooks env.
4631

4632
    This runs on master, primary and secondary nodes of the instance.
4633

4634
    """
4635
    env = {
4636
      "ADD_MODE": self.op.mode,
4637
      }
4638
    if self.op.mode == constants.INSTANCE_IMPORT:
4639
      env["SRC_NODE"] = self.op.src_node
4640
      env["SRC_PATH"] = self.op.src_path
4641
      env["SRC_IMAGES"] = self.src_images
4642

    
4643
    env.update(_BuildInstanceHookEnv(
4644
      name=self.op.instance_name,
4645
      primary_node=self.op.pnode,
4646
      secondary_nodes=self.secondaries,
4647
      status=self.op.start,
4648
      os_type=self.op.os_type,
4649
      memory=self.be_full[constants.BE_MEMORY],
4650
      vcpus=self.be_full[constants.BE_VCPUS],
4651
      nics=_PreBuildNICHooksList(self, self.nics),
4652
      disk_template=self.op.disk_template,
4653
      disks=[(d["size"], d["mode"]) for d in self.disks],
4654
    ))
4655

    
4656
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
4657
          self.secondaries)
4658
    return env, nl, nl
4659

    
4660

    
4661
  def CheckPrereq(self):
4662
    """Check prerequisites.
4663

4664
    """
4665
    if (not self.cfg.GetVGName() and
4666
        self.op.disk_template not in constants.DTS_NOT_LVM):
4667
      raise errors.OpPrereqError("Cluster does not support lvm-based"
4668
                                 " instances")
4669

    
4670
    if self.op.mode == constants.INSTANCE_IMPORT:
4671
      src_node = self.op.src_node
4672
      src_path = self.op.src_path
4673

    
4674
      if src_node is None:
4675
        exp_list = self.rpc.call_export_list(
4676
          self.acquired_locks[locking.LEVEL_NODE])
4677
        found = False
4678
        for node in exp_list:
4679
          if not exp_list[node].failed and src_path in exp_list[node].data:
4680
            found = True
4681
            self.op.src_node = src_node = node
4682
            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
4683
                                                       src_path)
4684
            break
4685
        if not found:
4686
          raise errors.OpPrereqError("No export found for relative path %s" %
4687
                                      src_path)
4688

    
4689
      _CheckNodeOnline(self, src_node)
4690
      result = self.rpc.call_export_info(src_node, src_path)
4691
      msg = result.RemoteFailMsg()
4692
      if msg:
4693
        raise errors.OpPrereqError("No export or invalid export found in"
4694
                                   " dir %s: %s" % (src_path, msg))
4695

    
4696
      export_info = objects.SerializableConfigParser.Loads(str(result.payload))
4697
      if not export_info.has_section(constants.INISECT_EXP):
4698
        raise errors.ProgrammerError("Corrupted export config")
4699

    
4700
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
4701
      if (int(ei_version) != constants.EXPORT_VERSION):
4702
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
4703
                                   (ei_version, constants.EXPORT_VERSION))
4704

    
4705
      # Check that the new instance doesn't have less disks than the export
4706
      instance_disks = len(self.disks)
4707
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
4708
      if instance_disks < export_disks:
4709
        raise errors.OpPrereqError("Not enough disks to import."
4710
                                   " (instance: %d, export: %d)" %
4711
                                   (instance_disks, export_disks))
4712

    
4713
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
4714
      disk_images = []
4715
      for idx in range(export_disks):
4716
        option = 'disk%d_dump' % idx
4717
        if export_info.has_option(constants.INISECT_INS, option):
4718
          # FIXME: are the old os-es, disk sizes, etc. useful?
4719
          export_name = export_info.get(constants.INISECT_INS, option)
4720
          image = os.path.join(src_path, export_name)
4721
          disk_images.append(image)
4722
        else:
4723
          disk_images.append(False)
4724

    
4725
      self.src_images = disk_images
4726

    
4727
      old_name = export_info.get(constants.INISECT_INS, 'name')
4728
      # FIXME: int() here could throw a ValueError on broken exports
4729
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
4730
      if self.op.instance_name == old_name:
4731
        for idx, nic in enumerate(self.nics):
4732
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
4733
            nic_mac_ini = 'nic%d_mac' % idx
4734
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
4735

    
4736
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
4737
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
4738
    if self.op.start and not self.op.ip_check:
4739
      raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
4740
                                 " adding an instance in start mode")
4741

    
4742
    if self.op.ip_check:
4743
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
4744
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4745
                                   (self.check_ip, self.op.instance_name))
4746

    
4747
    #### mac address generation
4748
    # By generating here the mac address both the allocator and the hooks get
4749
    # the real final mac address rather than the 'auto' or 'generate' value.
4750
    # There is a race condition between the generation and the instance object
4751
    # creation, which means that we know the mac is valid now, but we're not
4752
    # sure it will be when we actually add the instance. If things go bad
4753
    # adding the instance will abort because of a duplicate mac, and the
4754
    # creation job will fail.
4755
    for nic in self.nics:
4756
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
4757
        nic.mac = self.cfg.GenerateMAC()
4758

    
4759
    #### allocator run
4760

    
4761
    if self.op.iallocator is not None:
4762
      self._RunAllocator()
4763

    
4764
    #### node related checks
4765

    
4766
    # check primary node
4767
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
4768
    assert self.pnode is not None, \
4769
      "Cannot retrieve locked node %s" % self.op.pnode
4770
    if pnode.offline:
4771
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
4772
                                 pnode.name)
4773
    if pnode.drained:
4774
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
4775
                                 pnode.name)
4776

    
4777
    self.secondaries = []
4778

    
4779
    # mirror node verification
4780
    if self.op.disk_template in constants.DTS_NET_MIRROR:
4781
      if self.op.snode is None:
4782
        raise errors.OpPrereqError("The networked disk templates need"
4783
                                   " a mirror node")
4784
      if self.op.snode == pnode.name:
4785
        raise errors.OpPrereqError("The secondary node cannot be"
4786
                                   " the primary node.")
4787
      _CheckNodeOnline(self, self.op.snode)
4788
      _CheckNodeNotDrained(self, self.op.snode)
4789
      self.secondaries.append(self.op.snode)
4790

    
4791
    nodenames = [pnode.name] + self.secondaries
4792

    
4793
    req_size = _ComputeDiskSize(self.op.disk_template,
4794
                                self.disks)
4795

    
4796
    # Check lv size requirements
4797
    if req_size is not None:
4798
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
4799
                                         self.op.hypervisor)
4800
      for node in nodenames:
4801
        info = nodeinfo[node]
4802
        info.Raise()
4803
        info = info.data
4804
        if not info:
4805
          raise errors.OpPrereqError("Cannot get current information"
4806
                                     " from node '%s'" % node)
4807
        vg_free = info.get('vg_free', None)
4808
        if not isinstance(vg_free, int):
4809
          raise errors.OpPrereqError("Can't compute free disk space on"
4810
                                     " node %s" % node)
4811
        if req_size > info['vg_free']:
4812
          raise errors.OpPrereqError("Not enough disk space on target node %s."
4813
                                     " %d MB available, %d MB required" %
4814
                                     (node, info['vg_free'], req_size))
4815

    
4816
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
4817

    
4818
    # os verification
4819
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
4820
    result.Raise()
4821
    if not isinstance(result.data, objects.OS):
4822
      raise errors.OpPrereqError("OS '%s' not in supported os list for"
4823
                                 " primary node"  % self.op.os_type)
4824

    
4825
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
4826

    
4827
    # memory check on primary node
4828
    if self.op.start:
4829
      _CheckNodeFreeMemory(self, self.pnode.name,
4830
                           "creating instance %s" % self.op.instance_name,
4831
                           self.be_full[constants.BE_MEMORY],
4832
                           self.op.hypervisor)
4833

    
4834
  def Exec(self, feedback_fn):
4835
    """Create and add the instance to the cluster.
4836

4837
    """
4838
    instance = self.op.instance_name
4839
    pnode_name = self.pnode.name
4840

    
4841
    ht_kind = self.op.hypervisor
4842
    if ht_kind in constants.HTS_REQ_PORT:
4843
      network_port = self.cfg.AllocatePort()
4844
    else:
4845
      network_port = None
4846

    
4847
    ##if self.op.vnc_bind_address is None:
4848
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
4849

    
4850
    # this is needed because os.path.join does not accept None arguments
4851
    if self.op.file_storage_dir is None:
4852
      string_file_storage_dir = ""
4853
    else:
4854
      string_file_storage_dir = self.op.file_storage_dir
4855

    
4856
    # build the full file storage dir path
4857
    file_storage_dir = os.path.normpath(os.path.join(
4858
                                        self.cfg.GetFileStorageDir(),
4859
                                        string_file_storage_dir, instance))
4860

    
4861

    
4862
    disks = _GenerateDiskTemplate(self,
4863
                                  self.op.disk_template,
4864
                                  instance, pnode_name,
4865
                                  self.secondaries,
4866
                                  self.disks,
4867
                                  file_storage_dir,
4868
                                  self.op.file_driver,
4869
                                  0)
4870

    
4871
    iobj = objects.Instance(name=instance, os=self.op.os_type,
4872
                            primary_node=pnode_name,
4873
                            nics=self.nics, disks=disks,
4874
                            disk_template=self.op.disk_template,
4875
                            admin_up=False,
4876
                            network_port=network_port,
4877
                            beparams=self.op.beparams,
4878
                            hvparams=self.op.hvparams,
4879
                            hypervisor=self.op.hypervisor,
4880
                            )
4881

    
4882
    feedback_fn("* creating instance disks...")
4883
    try:
4884
      _CreateDisks(self, iobj)
4885
    except errors.OpExecError:
4886
      self.LogWarning("Device creation failed, reverting...")
4887
      try:
4888
        _RemoveDisks(self, iobj)
4889
      finally:
4890
        self.cfg.ReleaseDRBDMinors(instance)
4891
        raise
4892

    
4893
    feedback_fn("adding instance %s to cluster config" % instance)
4894

    
4895
    self.cfg.AddInstance(iobj)
4896
    # Declare that we don't want to remove the instance lock anymore, as we've
4897
    # added the instance to the config
4898
    del self.remove_locks[locking.LEVEL_INSTANCE]
4899
    # Unlock all the nodes
4900
    if self.op.mode == constants.INSTANCE_IMPORT:
4901
      nodes_keep = [self.op.src_node]
4902
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
4903
                       if node != self.op.src_node]
4904
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
4905
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
4906
    else:
4907
      self.context.glm.release(locking.LEVEL_NODE)
4908
      del self.acquired_locks[locking.LEVEL_NODE]
4909

    
4910
    if self.op.wait_for_sync:
4911
      disk_abort = not _WaitForSync(self, iobj)
4912
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
4913
      # make sure the disks are not degraded (still sync-ing is ok)
4914
      time.sleep(15)
4915
      feedback_fn("* checking mirrors status")
4916
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
4917
    else:
4918
      disk_abort = False
4919

    
4920
    if disk_abort:
4921
      _RemoveDisks(self, iobj)
4922
      self.cfg.RemoveInstance(iobj.name)
4923
      # Make sure the instance lock gets removed
4924
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
4925
      raise errors.OpExecError("There are some degraded disks for"
4926
                               " this instance")
4927

    
4928
    feedback_fn("creating os for instance %s on node %s" %
4929
                (instance, pnode_name))
4930

    
4931
    if iobj.disk_template != constants.DT_DISKLESS:
4932
      if self.op.mode == constants.INSTANCE_CREATE:
4933
        feedback_fn("* running the instance OS create scripts...")
4934
        result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
4935
        msg = result.RemoteFailMsg()
4936
        if msg:
4937
          raise errors.OpExecError("Could not add os for instance %s"
4938
                                   " on node %s: %s" %
4939
                                   (instance, pnode_name, msg))
4940

    
4941
      elif self.op.mode == constants.INSTANCE_IMPORT:
4942
        feedback_fn("* running the instance OS import scripts...")
4943
        src_node = self.op.src_node
4944
        src_images = self.src_images
4945
        cluster_name = self.cfg.GetClusterName()
4946
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
4947
                                                         src_node, src_images,
4948
                                                         cluster_name)
4949
        import_result.Raise()
4950
        for idx, result in enumerate(import_result.data):
4951
          if not result:
4952
            self.LogWarning("Could not import the image %s for instance"
4953
                            " %s, disk %d, on node %s" %
4954
                            (src_images[idx], instance, idx, pnode_name))
4955
      else:
4956
        # also checked in the prereq part
4957
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
4958
                                     % self.op.mode)
4959

    
4960
    if self.op.start:
4961
      iobj.admin_up = True
4962
      self.cfg.Update(iobj)
4963
      logging.info("Starting instance %s on node %s", instance, pnode_name)
4964
      feedback_fn("* starting instance...")
4965
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
4966
      msg = result.RemoteFailMsg()
4967
      if msg:
4968
        raise errors.OpExecError("Could not start instance: %s" % msg)
4969

    
4970

    
4971
class LUConnectConsole(NoHooksLU):
4972
  """Connect to an instance's console.
4973

4974
  This is somewhat special in that it returns the command line that
4975
  you need to run on the master node in order to connect to the
4976
  console.
4977

4978
  """
4979
  _OP_REQP = ["instance_name"]
4980
  REQ_BGL = False
4981

    
4982
  def ExpandNames(self):
4983
    self._ExpandAndLockInstance()
4984

    
4985
  def CheckPrereq(self):
4986
    """Check prerequisites.
4987

4988
    This checks that the instance is in the cluster.
4989

4990
    """
4991
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4992
    assert self.instance is not None, \
4993
      "Cannot retrieve locked instance %s" % self.op.instance_name
4994
    _CheckNodeOnline(self, self.instance.primary_node)
4995

    
4996
  def Exec(self, feedback_fn):
4997
    """Connect to the console of an instance
4998

4999
    """
5000
    instance = self.instance
5001
    node = instance.primary_node
5002

    
5003
    node_insts = self.rpc.call_instance_list([node],
5004
                                             [instance.hypervisor])[node]
5005
    node_insts.Raise()
5006

    
5007
    if instance.name not in node_insts.data:
5008
      raise errors.OpExecError("Instance %s is not running." % instance.name)
5009

    
5010
    logging.debug("Connecting to console of %s on %s", instance.name, node)
5011

    
5012
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
5013
    cluster = self.cfg.GetClusterInfo()
5014
    # beparams and hvparams are passed separately, to avoid editing the
5015
    # instance and then saving the defaults in the instance itself.
5016
    hvparams = cluster.FillHV(instance)
5017
    beparams = cluster.FillBE(instance)
5018
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
5019

    
5020
    # build ssh cmdline
5021
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
5022

    
5023

    
5024
class LUReplaceDisks(LogicalUnit):
5025
  """Replace the disks of an instance.
5026

5027
  """
5028
  HPATH = "mirrors-replace"
5029
  HTYPE = constants.HTYPE_INSTANCE
5030
  _OP_REQP = ["instance_name", "mode", "disks"]
5031
  REQ_BGL = False
5032

    
5033
  def CheckArguments(self):
5034
    if not hasattr(self.op, "remote_node"):
5035
      self.op.remote_node = None
5036
    if not hasattr(self.op, "iallocator"):
5037
      self.op.iallocator = None
5038

    
5039
    # check for valid parameter combination
5040
    cnt = [self.op.remote_node, self.op.iallocator].count(None)
5041
    if self.op.mode == constants.REPLACE_DISK_CHG:
5042
      if cnt == 2:
5043
        raise errors.OpPrereqError("When changing the secondary either an"
5044
                                   " iallocator script must be used or the"
5045
                                   " new node given")
5046
      elif cnt == 0:
5047
        raise errors.OpPrereqError("Give either the iallocator or the new"
5048
                                   " secondary, not both")
5049
    else: # not replacing the secondary
5050
      if cnt != 2:
5051
        raise errors.OpPrereqError("The iallocator and new node options can"
5052
                                   " be used only when changing the"
5053
                                   " secondary node")
5054

    
5055
  def ExpandNames(self):
5056
    self._ExpandAndLockInstance()
5057

    
5058
    if self.op.iallocator is not None:
5059
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5060
    elif self.op.remote_node is not None:
5061
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
5062
      if remote_node is None:
5063
        raise errors.OpPrereqError("Node '%s' not known" %
5064
                                   self.op.remote_node)
5065
      self.op.remote_node = remote_node
5066
      # Warning: do not remove the locking of the new secondary here
5067
      # unless DRBD8.AddChildren is changed to work in parallel;
5068
      # currently it doesn't since parallel invocations of
5069
      # FindUnusedMinor will conflict
5070
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
5071
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5072
    else:
5073
      self.needed_locks[locking.LEVEL_NODE] = []
5074
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5075

    
5076
  def DeclareLocks(self, level):
5077
    # If we're not already locking all nodes in the set we have to declare the
5078
    # instance's primary/secondary nodes.
5079
    if (level == locking.LEVEL_NODE and
5080
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
5081
      self._LockInstancesNodes()
5082

    
5083
  def _RunAllocator(self):
5084
    """Compute a new secondary node using an IAllocator.
5085

5086
    """
5087
    ial = IAllocator(self,
5088
                     mode=constants.IALLOCATOR_MODE_RELOC,
5089
                     name=self.op.instance_name,
5090
                     relocate_from=[self.sec_node])
5091

    
5092
    ial.Run(self.op.iallocator)
5093

    
5094
    if not ial.success:
5095
      raise errors.OpPrereqError("Can't compute nodes using"
5096
                                 " iallocator '%s': %s" % (self.op.iallocator,
5097
                                                           ial.info))
5098
    if len(ial.nodes) != ial.required_nodes:
5099
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5100
                                 " of nodes (%s), required %s" %
5101
                                 (len(ial.nodes), ial.required_nodes))
5102
    self.op.remote_node = ial.nodes[0]
5103
    self.LogInfo("Selected new secondary for the instance: %s",
5104
                 self.op.remote_node)
5105

    
5106
  def BuildHooksEnv(self):
5107
    """Build hooks env.
5108

5109
    This runs on the master, the primary and all the secondaries.
5110

5111
    """
5112
    env = {
5113
      "MODE": self.op.mode,
5114
      "NEW_SECONDARY": self.op.remote_node,
5115
      "OLD_SECONDARY": self.instance.secondary_nodes[0],
5116
      }
5117
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5118
    nl = [
5119
      self.cfg.GetMasterNode(),
5120
      self.instance.primary_node,
5121
      ]
5122
    if self.op.remote_node is not None:
5123
      nl.append(self.op.remote_node)
5124
    return env, nl, nl
5125

    
5126
  def CheckPrereq(self):
5127
    """Check prerequisites.
5128

5129
    This checks that the instance is in the cluster.
5130

5131
    """
5132
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5133
    assert instance is not None, \
5134
      "Cannot retrieve locked instance %s" % self.op.instance_name
5135
    self.instance = instance
5136

    
5137
    if instance.disk_template != constants.DT_DRBD8:
5138
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
5139
                                 " instances")
5140

    
5141
    if len(instance.secondary_nodes) != 1:
5142
      raise errors.OpPrereqError("The instance has a strange layout,"
5143
                                 " expected one secondary but found %d" %
5144
                                 len(instance.secondary_nodes))
5145

    
5146
    self.sec_node = instance.secondary_nodes[0]
5147

    
5148
    if self.op.iallocator is not None:
5149
      self._RunAllocator()
5150

    
5151
    remote_node = self.op.remote_node
5152
    if remote_node is not None:
5153
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
5154
      assert self.remote_node_info is not None, \
5155
        "Cannot retrieve locked node %s" % remote_node
5156
    else:
5157
      self.remote_node_info = None
5158
    if remote_node == instance.primary_node:
5159
      raise errors.OpPrereqError("The specified node is the primary node of"
5160
                                 " the instance.")
5161
    elif remote_node == self.sec_node:
5162
      raise errors.OpPrereqError("The specified node is already the"
5163
                                 " secondary node of the instance.")
5164

    
5165
    if self.op.mode == constants.REPLACE_DISK_PRI:
5166
      n1 = self.tgt_node = instance.primary_node
5167
      n2 = self.oth_node = self.sec_node
5168
    elif self.op.mode == constants.REPLACE_DISK_SEC:
5169
      n1 = self.tgt_node = self.sec_node
5170
      n2 = self.oth_node = instance.primary_node
5171
    elif self.op.mode == constants.REPLACE_DISK_CHG:
5172
      n1 = self.new_node = remote_node
5173
      n2 = self.oth_node = instance.primary_node
5174
      self.tgt_node = self.sec_node
5175
      _CheckNodeNotDrained(self, remote_node)
5176
    else:
5177
      raise errors.ProgrammerError("Unhandled disk replace mode")
5178

    
5179
    _CheckNodeOnline(self, n1)
5180
    _CheckNodeOnline(self, n2)
5181

    
5182
    if not self.op.disks:
5183
      self.op.disks = range(len(instance.disks))
5184

    
5185
    for disk_idx in self.op.disks:
5186
      instance.FindDisk(disk_idx)
5187

    
5188
  def _ExecD8DiskOnly(self, feedback_fn):
5189
    """Replace a disk on the primary or secondary for dbrd8.
5190

5191
    The algorithm for replace is quite complicated:
5192

5193
      1. for each disk to be replaced:
5194

5195
        1. create new LVs on the target node with unique names
5196
        1. detach old LVs from the drbd device
5197
        1. rename old LVs to name_replaced.<time_t>
5198
        1. rename new LVs to old LVs
5199
        1. attach the new LVs (with the old names now) to the drbd device
5200

5201
      1. wait for sync across all devices
5202

5203
      1. for each modified disk:
5204

5205
        1. remove old LVs (which have the name name_replaces.<time_t>)
5206

5207
    Failures are not very well handled.
5208

5209
    """
5210
    steps_total = 6
5211
    warning, info = (self.proc.LogWarning, self.proc.LogInfo)
5212
    instance = self.instance
5213
    iv_names = {}
5214
    vgname = self.cfg.GetVGName()
5215
    # start of work
5216
    cfg = self.cfg
5217
    tgt_node = self.tgt_node
5218
    oth_node = self.oth_node
5219

    
5220
    # Step: check device activation
5221
    self.proc.LogStep(1, steps_total, "check device existence")
5222
    info("checking volume groups")
5223
    my_vg = cfg.GetVGName()
5224
    results = self.rpc.call_vg_list([oth_node, tgt_node])
5225
    if not results:
5226
      raise errors.OpExecError("Can't list volume groups on the nodes")
5227
    for node in oth_node, tgt_node:
5228
      res = results[node]
5229
      if res.failed or not res.data or my_vg not in res.data:
5230
        raise errors.OpExecError("Volume group '%s' not found on %s" %
5231
                                 (my_vg, node))
5232
    for idx, dev in enumerate(instance.disks):
5233
      if idx not in self.op.disks:
5234
        continue
5235
      for node in tgt_node, oth_node:
5236
        info("checking disk/%d on %s" % (idx, node))
5237
        cfg.SetDiskID(dev, node)
5238
        result = self.rpc.call_blockdev_find(node, dev)
5239
        msg = result.RemoteFailMsg()
5240
        if not msg and not result.payload:
5241
          msg = "disk not found"
5242
        if msg:
5243
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
5244
                                   (idx, node, msg))
5245

    
5246
    # Step: check other node consistency
5247
    self.proc.LogStep(2, steps_total, "check peer consistency")
5248
    for idx, dev in enumerate(instance.disks):
5249
      if idx not in self.op.disks:
5250
        continue
5251
      info("checking disk/%d consistency on %s" % (idx, oth_node))
5252
      if not _CheckDiskConsistency(self, dev, oth_node,
5253
                                   oth_node==instance.primary_node):
5254
        raise errors.OpExecError("Peer node (%s) has degraded storage, unsafe"
5255
                                 " to replace disks on this node (%s)" %
5256
                                 (oth_node, tgt_node))
5257

    
5258
    # Step: create new storage
5259
    self.proc.LogStep(3, steps_total, "allocate new storage")
5260
    for idx, dev in enumerate(instance.disks):
5261
      if idx not in self.op.disks:
5262
        continue
5263
      size = dev.size
5264
      cfg.SetDiskID(dev, tgt_node)
5265
      lv_names = [".disk%d_%s" % (idx, suf)
5266
                  for suf in ["data", "meta"]]
5267
      names = _GenerateUniqueNames(self, lv_names)
5268
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5269
                             logical_id=(vgname, names[0]))
5270
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5271
                             logical_id=(vgname, names[1]))
5272
      new_lvs = [lv_data, lv_meta]
5273
      old_lvs = dev.children
5274
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
5275
      info("creating new local storage on %s for %s" %
5276
           (tgt_node, dev.iv_name))
5277
      # we pass force_create=True to force the LVM creation
5278
      for new_lv in new_lvs:
5279
        _CreateBlockDev(self, tgt_node, instance, new_lv, True,
5280
                        _GetInstanceInfoText(instance), False)
5281

    
5282
    # Step: for each lv, detach+rename*2+attach
5283
    self.proc.LogStep(4, steps_total, "change drbd configuration")
5284
    for dev, old_lvs, new_lvs in iv_names.itervalues():
5285
      info("detaching %s drbd from local storage" % dev.iv_name)
5286
      result = self.rpc.call_blockdev_removechildren(tgt_node, dev, old_lvs)
5287
      msg = result.RemoteFailMsg()
5288
      if msg:
5289
        raise errors.OpExecError("Can't detach drbd from local storage on node"
5290
                                 " %s for device %s: %s" %
5291
                                 (tgt_node, dev.iv_name, msg))
5292
      #dev.children = []
5293
      #cfg.Update(instance)
5294

    
5295
      # ok, we created the new LVs, so now we know we have the needed
5296
      # storage; as such, we proceed on the target node to rename
5297
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
5298
      # using the assumption that logical_id == physical_id (which in
5299
      # turn is the unique_id on that node)
5300

    
5301
      # FIXME(iustin): use a better name for the replaced LVs
5302
      temp_suffix = int(time.time())
5303
      ren_fn = lambda d, suff: (d.physical_id[0],
5304
                                d.physical_id[1] + "_replaced-%s" % suff)
5305
      # build the rename list based on what LVs exist on the node
5306
      rlist = []
5307
      for to_ren in old_lvs:
5308
        result = self.rpc.call_blockdev_find(tgt_node, to_ren)
5309
        if not result.RemoteFailMsg() and result.payload:
5310
          # device exists
5311
          rlist.append((to_ren, ren_fn(to_ren, temp_suffix)))
5312

    
5313
      info("renaming the old LVs on the target node")
5314
      result = self.rpc.call_blockdev_rename(tgt_node, rlist)
5315
      msg = result.RemoteFailMsg()
5316
      if msg:
5317
        raise errors.OpExecError("Can't rename old LVs on node %s: %s" %
5318
                                 (tgt_node, msg))
5319
      # now we rename the new LVs to the old LVs
5320
      info("renaming the new LVs on the target node")
5321
      rlist = [(new, old.physical_id) for old, new in zip(old_lvs, new_lvs)]
5322
      result = self.rpc.call_blockdev_rename(tgt_node, rlist)
5323
      msg = result.RemoteFailMsg()
5324
      if msg:
5325
        raise errors.OpExecError("Can't rename new LVs on node %s: %s" %
5326
                                 (tgt_node, msg))
5327

    
5328
      for old, new in zip(old_lvs, new_lvs):
5329
        new.logical_id = old.logical_id
5330
        cfg.SetDiskID(new, tgt_node)
5331

    
5332
      for disk in old_lvs:
5333
        disk.logical_id = ren_fn(disk, temp_suffix)
5334
        cfg.SetDiskID(disk, tgt_node)
5335

    
5336
      # now that the new lvs have the old name, we can add them to the device
5337
      info("adding new mirror component on %s" % tgt_node)
5338
      result = self.rpc.call_blockdev_addchildren(tgt_node, dev, new_lvs)
5339
      msg = result.RemoteFailMsg()
5340
      if msg:
5341
        for new_lv in new_lvs:
5342
          msg = self.rpc.call_blockdev_remove(tgt_node, new_lv).RemoteFailMsg()
5343
          if msg:
5344
            warning("Can't rollback device %s: %s", dev, msg,
5345
                    hint="cleanup manually the unused logical volumes")
5346
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
5347

    
5348
      dev.children = new_lvs
5349
      cfg.Update(instance)
5350

    
5351
    # Step: wait for sync
5352

    
5353
    # this can fail as the old devices are degraded and _WaitForSync
5354
    # does a combined result over all disks, so we don't check its
5355
    # return value
5356
    self.proc.LogStep(5, steps_total, "sync devices")
5357
    _WaitForSync(self, instance, unlock=True)
5358

    
5359
    # so check manually all the devices
5360
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
5361
      cfg.SetDiskID(dev, instance.primary_node)
5362
      result = self.rpc.call_blockdev_find(instance.primary_node, dev)
5363
      msg = result.RemoteFailMsg()
5364
      if not msg and not result.payload:
5365
        msg = "disk not found"
5366
      if msg:
5367
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
5368
                                 (name, msg))
5369
      if result.payload[5]:
5370
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
5371

    
5372
    # Step: remove old storage
5373
    self.proc.LogStep(6, steps_total, "removing old storage")
5374
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
5375
      info("remove logical volumes for %s" % name)
5376
      for lv in old_lvs:
5377
        cfg.SetDiskID(lv, tgt_node)
5378
        msg = self.rpc.call_blockdev_remove(tgt_node, lv).RemoteFailMsg()
5379
        if msg:
5380
          warning("Can't remove old LV: %s" % msg,
5381
                  hint="manually remove unused LVs")
5382
          continue
5383

    
5384
  def _ExecD8Secondary(self, feedback_fn):
5385
    """Replace the secondary node for drbd8.
5386

5387
    The algorithm for replace is quite complicated:
5388
      - for all disks of the instance:
5389
        - create new LVs on the new node with same names
5390
        - shutdown the drbd device on the old secondary
5391
        - disconnect the drbd network on the primary
5392
        - create the drbd device on the new secondary
5393
        - network attach the drbd on the primary, using an artifice:
5394
          the drbd code for Attach() will connect to the network if it
5395
          finds a device which is connected to the good local disks but
5396
          not network enabled
5397
      - wait for sync across all devices
5398
      - remove all disks from the old secondary
5399

5400
    Failures are not very well handled.
5401

5402
    """
5403
    steps_total = 6
5404
    warning, info = (self.proc.LogWarning, self.proc.LogInfo)
5405
    instance = self.instance
5406
    iv_names = {}
5407
    # start of work
5408
    cfg = self.cfg
5409
    old_node = self.tgt_node
5410
    new_node = self.new_node
5411
    pri_node = instance.primary_node
5412
    nodes_ip = {
5413
      old_node: self.cfg.GetNodeInfo(old_node).secondary_ip,
5414
      new_node: self.cfg.GetNodeInfo(new_node).secondary_ip,
5415
      pri_node: self.cfg.GetNodeInfo(pri_node).secondary_ip,
5416
      }
5417

    
5418
    # Step: check device activation
5419
    self.proc.LogStep(1, steps_total, "check device existence")
5420
    info("checking volume groups")
5421
    my_vg = cfg.GetVGName()
5422
    results = self.rpc.call_vg_list([pri_node, new_node])
5423
    for node in pri_node, new_node:
5424
      res = results[node]
5425
      if res.failed or not res.data or my_vg not in res.data:
5426
        raise errors.OpExecError("Volume group '%s' not found on %s" %
5427
                                 (my_vg, node))
5428
    for idx, dev in enumerate(instance.disks):
5429
      if idx not in self.op.disks:
5430
        continue
5431
      info("checking disk/%d on %s" % (idx, pri_node))
5432
      cfg.SetDiskID(dev, pri_node)
5433
      result = self.rpc.call_blockdev_find(pri_node, dev)
5434
      msg = result.RemoteFailMsg()
5435
      if not msg and not result.payload:
5436
        msg = "disk not found"
5437
      if msg:
5438
        raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
5439
                                 (idx, pri_node, msg))
5440

    
5441
    # Step: check other node consistency
5442
    self.proc.LogStep(2, steps_total, "check peer consistency")
5443
    for idx, dev in enumerate(instance.disks):
5444
      if idx not in self.op.disks:
5445
        continue
5446
      info("checking disk/%d consistency on %s" % (idx, pri_node))
5447
      if not _CheckDiskConsistency(self, dev, pri_node, True, ldisk=True):
5448
        raise errors.OpExecError("Primary node (%s) has degraded storage,"
5449
                                 " unsafe to replace the secondary" %
5450
                                 pri_node)
5451

    
5452
    # Step: create new storage
5453
    self.proc.LogStep(3, steps_total, "allocate new storage")
5454
    for idx, dev in enumerate(instance.disks):
5455
      info("adding new local storage on %s for disk/%d" %
5456
           (new_node, idx))
5457
      # we pass force_create=True to force LVM creation
5458
      for new_lv in dev.children:
5459
        _CreateBlockDev(self, new_node, instance, new_lv, True,
5460
                        _GetInstanceInfoText(instance), False)
5461

    
5462
    # Step 4: dbrd minors and drbd setups changes
5463
    # after this, we must manually remove the drbd minors on both the
5464
    # error and the success paths
5465
    minors = cfg.AllocateDRBDMinor([new_node for dev in instance.disks],
5466
                                   instance.name)
5467
    logging.debug("Allocated minors %s" % (minors,))
5468
    self.proc.LogStep(4, steps_total, "changing drbd configuration")
5469
    for idx, (dev, new_minor) in enumerate(zip(instance.disks, minors)):
5470
      size = dev.size
5471
      info("activating a new drbd on %s for disk/%d" % (new_node, idx))
5472
      # create new devices on new_node; note that we create two IDs:
5473
      # one without port, so the drbd will be activated without
5474
      # networking information on the new node at this stage, and one
5475
      # with network, for the latter activation in step 4
5476
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
5477
      if pri_node == o_node1:
5478
        p_minor = o_minor1
5479
      else:
5480
        p_minor = o_minor2
5481

    
5482
      new_alone_id = (pri_node, new_node, None, p_minor, new_minor, o_secret)
5483
      new_net_id = (pri_node, new_node, o_port, p_minor, new_minor, o_secret)
5484

    
5485
      iv_names[idx] = (dev, dev.children, new_net_id)
5486
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
5487
                    new_net_id)
5488
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
5489
                              logical_id=new_alone_id,
5490
                              children=dev.children)
5491
      try:
5492
        _CreateSingleBlockDev(self, new_node, instance, new_drbd,
5493
                              _GetInstanceInfoText(instance), False)
5494
      except errors.GenericError:
5495
        self.cfg.ReleaseDRBDMinors(instance.name)
5496
        raise
5497

    
5498
    for idx, dev in enumerate(instance.disks):
5499
      # we have new devices, shutdown the drbd on the old secondary
5500
      info("shutting down drbd for disk/%d on old node" % idx)
5501
      cfg.SetDiskID(dev, old_node)
5502
      msg = self.rpc.call_blockdev_shutdown(old_node, dev).RemoteFailMsg()
5503
      if msg:
5504
        warning("Failed to shutdown drbd for disk/%d on old node: %s" %
5505
                (idx, msg),
5506
                hint="Please cleanup this device manually as soon as possible")
5507

    
5508
    info("detaching primary drbds from the network (=> standalone)")
5509
    result = self.rpc.call_drbd_disconnect_net([pri_node], nodes_ip,
5510
                                               instance.disks)[pri_node]
5511

    
5512
    msg = result.RemoteFailMsg()
5513
    if msg:
5514
      # detaches didn't succeed (unlikely)
5515
      self.cfg.ReleaseDRBDMinors(instance.name)
5516
      raise errors.OpExecError("Can't detach the disks from the network on"
5517
                               " old node: %s" % (msg,))
5518

    
5519
    # if we managed to detach at least one, we update all the disks of
5520
    # the instance to point to the new secondary
5521
    info("updating instance configuration")
5522
    for dev, _, new_logical_id in iv_names.itervalues():
5523
      dev.logical_id = new_logical_id
5524
      cfg.SetDiskID(dev, pri_node)
5525
    cfg.Update(instance)
5526

    
5527
    # and now perform the drbd attach
5528
    info("attaching primary drbds to new secondary (standalone => connected)")
5529
    result = self.rpc.call_drbd_attach_net([pri_node, new_node], nodes_ip,
5530
                                           instance.disks, instance.name,
5531
                                           False)
5532
    for to_node, to_result in result.items():
5533
      msg = to_result.RemoteFailMsg()
5534
      if msg:
5535
        warning("can't attach drbd disks on node %s: %s", to_node, msg,
5536
                hint="please do a gnt-instance info to see the"
5537
                " status of disks")
5538

    
5539
    # this can fail as the old devices are degraded and _WaitForSync
5540
    # does a combined result over all disks, so we don't check its
5541
    # return value
5542
    self.proc.LogStep(5, steps_total, "sync devices")
5543
    _WaitForSync(self, instance, unlock=True)
5544

    
5545
    # so check manually all the devices
5546
    for idx, (dev, old_lvs, _) in iv_names.iteritems():
5547
      cfg.SetDiskID(dev, pri_node)
5548
      result = self.rpc.call_blockdev_find(pri_node, dev)
5549
      msg = result.RemoteFailMsg()
5550
      if not msg and not result.payload:
5551
        msg = "disk not found"
5552
      if msg:
5553
        raise errors.OpExecError("Can't find DRBD device disk/%d: %s" %
5554
                                 (idx, msg))
5555
      if result.payload[5]:
5556
        raise errors.OpExecError("DRBD device disk/%d is degraded!" % idx)
5557

    
5558
    self.proc.LogStep(6, steps_total, "removing old storage")
5559
    for idx, (dev, old_lvs, _) in iv_names.iteritems():
5560
      info("remove logical volumes for disk/%d" % idx)
5561
      for lv in old_lvs:
5562
        cfg.SetDiskID(lv, old_node)
5563
        msg = self.rpc.call_blockdev_remove(old_node, lv).RemoteFailMsg()
5564
        if msg:
5565
          warning("Can't remove LV on old secondary: %s", msg,
5566
                  hint="Cleanup stale volumes by hand")
5567

    
5568
  def Exec(self, feedback_fn):
5569
    """Execute disk replacement.
5570

5571
    This dispatches the disk replacement to the appropriate handler.
5572

5573
    """
5574
    instance = self.instance
5575

    
5576
    # Activate the instance disks if we're replacing them on a down instance
5577
    if not instance.admin_up:
5578
      _StartInstanceDisks(self, instance, True)
5579

    
5580
    if self.op.mode == constants.REPLACE_DISK_CHG:
5581
      fn = self._ExecD8Secondary
5582
    else:
5583
      fn = self._ExecD8DiskOnly
5584

    
5585
    ret = fn(feedback_fn)
5586

    
5587
    # Deactivate the instance disks if we're replacing them on a down instance
5588
    if not instance.admin_up:
5589
      _SafeShutdownInstanceDisks(self, instance)
5590

    
5591
    return ret
5592

    
5593

    
5594
class LUGrowDisk(LogicalUnit):
5595
  """Grow a disk of an instance.
5596

5597
  """
5598
  HPATH = "disk-grow"
5599
  HTYPE = constants.HTYPE_INSTANCE
5600
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
5601
  REQ_BGL = False
5602

    
5603
  def ExpandNames(self):
5604
    self._ExpandAndLockInstance()
5605
    self.needed_locks[locking.LEVEL_NODE] = []
5606
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5607

    
5608
  def DeclareLocks(self, level):
5609
    if level == locking.LEVEL_NODE:
5610
      self._LockInstancesNodes()
5611

    
5612
  def BuildHooksEnv(self):
5613
    """Build hooks env.
5614

5615
    This runs on the master, the primary and all the secondaries.
5616

5617
    """
5618
    env = {
5619
      "DISK": self.op.disk,
5620
      "AMOUNT": self.op.amount,
5621
      }
5622
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5623
    nl = [
5624
      self.cfg.GetMasterNode(),
5625
      self.instance.primary_node,
5626
      ]
5627
    return env, nl, nl
5628

    
5629
  def CheckPrereq(self):
5630
    """Check prerequisites.
5631

5632
    This checks that the instance is in the cluster.
5633

5634
    """
5635
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5636
    assert instance is not None, \
5637
      "Cannot retrieve locked instance %s" % self.op.instance_name
5638
    nodenames = list(instance.all_nodes)
5639
    for node in nodenames:
5640
      _CheckNodeOnline(self, node)
5641

    
5642

    
5643
    self.instance = instance
5644

    
5645
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
5646
      raise errors.OpPrereqError("Instance's disk layout does not support"
5647
                                 " growing.")
5648

    
5649
    self.disk = instance.FindDisk(self.op.disk)
5650

    
5651
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
5652
                                       instance.hypervisor)
5653
    for node in nodenames:
5654
      info = nodeinfo[node]
5655
      if info.failed or not info.data:
5656
        raise errors.OpPrereqError("Cannot get current information"
5657
                                   " from node '%s'" % node)
5658
      vg_free = info.data.get('vg_free', None)
5659
      if not isinstance(vg_free, int):
5660
        raise errors.OpPrereqError("Can't compute free disk space on"
5661
                                   " node %s" % node)
5662
      if self.op.amount > vg_free:
5663
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
5664
                                   " %d MiB available, %d MiB required" %
5665
                                   (node, vg_free, self.op.amount))
5666

    
5667
  def Exec(self, feedback_fn):
5668
    """Execute disk grow.
5669

5670
    """
5671
    instance = self.instance
5672
    disk = self.disk
5673
    for node in instance.all_nodes:
5674
      self.cfg.SetDiskID(disk, node)
5675
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
5676
      msg = result.RemoteFailMsg()
5677
      if msg:
5678
        raise errors.OpExecError("Grow request failed to node %s: %s" %
5679
                                 (node, msg))
5680
    disk.RecordGrow(self.op.amount)
5681
    self.cfg.Update(instance)
5682
    if self.op.wait_for_sync:
5683
      disk_abort = not _WaitForSync(self, instance)
5684
      if disk_abort:
5685
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
5686
                             " status.\nPlease check the instance.")
5687

    
5688

    
5689
class LUQueryInstanceData(NoHooksLU):
5690
  """Query runtime instance data.
5691

5692
  """
5693
  _OP_REQP = ["instances", "static"]
5694
  REQ_BGL = False
5695

    
5696
  def ExpandNames(self):
5697
    self.needed_locks = {}
5698
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
5699

    
5700
    if not isinstance(self.op.instances, list):
5701
      raise errors.OpPrereqError("Invalid argument type 'instances'")
5702

    
5703
    if self.op.instances:
5704
      self.wanted_names = []
5705
      for name in self.op.instances:
5706
        full_name = self.cfg.ExpandInstanceName(name)
5707
        if full_name is None:
5708
          raise errors.OpPrereqError("Instance '%s' not known" % name)
5709
        self.wanted_names.append(full_name)
5710
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
5711
    else:
5712
      self.wanted_names = None
5713
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5714

    
5715
    self.needed_locks[locking.LEVEL_NODE] = []
5716
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5717

    
5718
  def DeclareLocks(self, level):
5719
    if level == locking.LEVEL_NODE:
5720
      self._LockInstancesNodes()
5721

    
5722
  def CheckPrereq(self):
5723
    """Check prerequisites.
5724

5725
    This only checks the optional instance list against the existing names.
5726

5727
    """
5728
    if self.wanted_names is None:
5729
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5730

    
5731
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
5732
                             in self.wanted_names]
5733
    return
5734

    
5735
  def _ComputeDiskStatus(self, instance, snode, dev):
5736
    """Compute block device status.
5737

5738
    """
5739
    static = self.op.static
5740
    if not static:
5741
      self.cfg.SetDiskID(dev, instance.primary_node)
5742
      dev_pstatus = self.rpc.call_blockdev_find(instance.primary_node, dev)
5743
      if dev_pstatus.offline:
5744
        dev_pstatus = None
5745
      else:
5746
        msg = dev_pstatus.RemoteFailMsg()
5747
        if msg:
5748
          raise errors.OpExecError("Can't compute disk status for %s: %s" %
5749
                                   (instance.name, msg))
5750
        dev_pstatus = dev_pstatus.payload
5751
    else:
5752
      dev_pstatus = None
5753

    
5754
    if dev.dev_type in constants.LDS_DRBD:
5755
      # we change the snode then (otherwise we use the one passed in)
5756
      if dev.logical_id[0] == instance.primary_node:
5757
        snode = dev.logical_id[1]
5758
      else:
5759
        snode = dev.logical_id[0]
5760

    
5761
    if snode and not static:
5762
      self.cfg.SetDiskID(dev, snode)
5763
      dev_sstatus = self.rpc.call_blockdev_find(snode, dev)
5764
      if dev_sstatus.offline:
5765
        dev_sstatus = None
5766
      else:
5767
        msg = dev_sstatus.RemoteFailMsg()
5768
        if msg:
5769
          raise errors.OpExecError("Can't compute disk status for %s: %s" %
5770
                                   (instance.name, msg))
5771
        dev_sstatus = dev_sstatus.payload
5772
    else:
5773
      dev_sstatus = None
5774

    
5775
    if dev.children:
5776
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
5777
                      for child in dev.children]
5778
    else:
5779
      dev_children = []
5780

    
5781
    data = {
5782
      "iv_name": dev.iv_name,
5783
      "dev_type": dev.dev_type,
5784
      "logical_id": dev.logical_id,
5785
      "physical_id": dev.physical_id,
5786
      "pstatus": dev_pstatus,
5787
      "sstatus": dev_sstatus,
5788
      "children": dev_children,
5789
      "mode": dev.mode,
5790
      }
5791

    
5792
    return data
5793

    
5794
  def Exec(self, feedback_fn):
5795
    """Gather and return data"""
5796
    result = {}
5797

    
5798
    cluster = self.cfg.GetClusterInfo()
5799

    
5800
    for instance in self.wanted_instances:
5801
      if not self.op.static:
5802
        remote_info = self.rpc.call_instance_info(instance.primary_node,
5803
                                                  instance.name,
5804
                                                  instance.hypervisor)
5805
        remote_info.Raise()
5806
        remote_info = remote_info.data
5807
        if remote_info and "state" in remote_info:
5808
          remote_state = "up"
5809
        else:
5810
          remote_state = "down"
5811
      else:
5812
        remote_state = None
5813
      if instance.admin_up:
5814
        config_state = "up"
5815
      else:
5816
        config_state = "down"
5817

    
5818
      disks = [self._ComputeDiskStatus(instance, None, device)
5819
               for device in instance.disks]
5820

    
5821
      idict = {
5822
        "name": instance.name,
5823
        "config_state": config_state,
5824
        "run_state": remote_state,
5825
        "pnode": instance.primary_node,
5826
        "snodes": instance.secondary_nodes,
5827
        "os": instance.os,
5828
        "nics": [(nic.mac, nic.ip, nic.bridge) for nic in instance.nics],
5829
        "disks": disks,
5830
        "hypervisor": instance.hypervisor,
5831
        "network_port": instance.network_port,
5832
        "hv_instance": instance.hvparams,
5833
        "hv_actual": cluster.FillHV(instance),
5834
        "be_instance": instance.beparams,
5835
        "be_actual": cluster.FillBE(instance),
5836
        }
5837

    
5838
      result[instance.name] = idict
5839

    
5840
    return result
5841

    
5842

    
5843
class LUSetInstanceParams(LogicalUnit):
5844
  """Modifies an instances's parameters.
5845

5846
  """
5847
  HPATH = "instance-modify"
5848
  HTYPE = constants.HTYPE_INSTANCE
5849
  _OP_REQP = ["instance_name"]
5850
  REQ_BGL = False
5851

    
5852
  def CheckArguments(self):
5853
    if not hasattr(self.op, 'nics'):
5854
      self.op.nics = []
5855
    if not hasattr(self.op, 'disks'):
5856
      self.op.disks = []
5857
    if not hasattr(self.op, 'beparams'):
5858
      self.op.beparams = {}
5859
    if not hasattr(self.op, 'hvparams'):
5860
      self.op.hvparams = {}
5861
    self.op.force = getattr(self.op, "force", False)
5862
    if not (self.op.nics or self.op.disks or
5863
            self.op.hvparams or self.op.beparams):
5864
      raise errors.OpPrereqError("No changes submitted")
5865

    
5866
    # Disk validation
5867
    disk_addremove = 0
5868
    for disk_op, disk_dict in self.op.disks:
5869
      if disk_op == constants.DDM_REMOVE:
5870
        disk_addremove += 1
5871
        continue
5872
      elif disk_op == constants.DDM_ADD:
5873
        disk_addremove += 1
5874
      else:
5875
        if not isinstance(disk_op, int):
5876
          raise errors.OpPrereqError("Invalid disk index")
5877
      if disk_op == constants.DDM_ADD:
5878
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
5879
        if mode not in constants.DISK_ACCESS_SET:
5880
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
5881
        size = disk_dict.get('size', None)
5882
        if size is None:
5883
          raise errors.OpPrereqError("Required disk parameter size missing")
5884
        try:
5885
          size = int(size)
5886
        except ValueError, err:
5887
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
5888
                                     str(err))
5889
        disk_dict['size'] = size
5890
      else:
5891
        # modification of disk
5892
        if 'size' in disk_dict:
5893
          raise errors.OpPrereqError("Disk size change not possible, use"
5894
                                     " grow-disk")
5895

    
5896
    if disk_addremove > 1:
5897
      raise errors.OpPrereqError("Only one disk add or remove operation"
5898
                                 " supported at a time")
5899

    
5900
    # NIC validation
5901
    nic_addremove = 0
5902
    for nic_op, nic_dict in self.op.nics:
5903
      if nic_op == constants.DDM_REMOVE:
5904
        nic_addremove += 1
5905
        continue
5906
      elif nic_op == constants.DDM_ADD:
5907
        nic_addremove += 1
5908
      else:
5909
        if not isinstance(nic_op, int):
5910
          raise errors.OpPrereqError("Invalid nic index")
5911

    
5912
      # nic_dict should be a dict
5913
      nic_ip = nic_dict.get('ip', None)
5914
      if nic_ip is not None:
5915
        if nic_ip.lower() == constants.VALUE_NONE:
5916
          nic_dict['ip'] = None
5917
        else:
5918
          if not utils.IsValidIP(nic_ip):
5919
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
5920

    
5921
      nic_bridge = nic_dict.get('bridge', None)
5922
      nic_link = nic_dict.get('link', None)
5923
      if nic_bridge and nic_link:
5924
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link' at the same time")
5925
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
5926
        nic_dict['bridge'] = None
5927
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
5928
        nic_dict['link'] = None
5929

    
5930
      if nic_op == constants.DDM_ADD:
5931
        nic_mac = nic_dict.get('mac', None)
5932
        if nic_mac is None:
5933
          nic_dict['mac'] = constants.VALUE_AUTO
5934

    
5935
      if 'mac' in nic_dict:
5936
        nic_mac = nic_dict['mac']
5937
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5938
          if not utils.IsValidMac(nic_mac):
5939
            raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
5940
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
5941
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
5942
                                     " modifying an existing nic")
5943

    
5944
    if nic_addremove > 1:
5945
      raise errors.OpPrereqError("Only one NIC add or remove operation"
5946
                                 " supported at a time")
5947

    
5948
  def ExpandNames(self):
5949
    self._ExpandAndLockInstance()
5950
    self.needed_locks[locking.LEVEL_NODE] = []
5951
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5952

    
5953
  def DeclareLocks(self, level):
5954
    if level == locking.LEVEL_NODE:
5955
      self._LockInstancesNodes()
5956

    
5957
  def BuildHooksEnv(self):
5958
    """Build hooks env.
5959

5960
    This runs on the master, primary and secondaries.
5961

5962
    """
5963
    args = dict()
5964
    if constants.BE_MEMORY in self.be_new:
5965
      args['memory'] = self.be_new[constants.BE_MEMORY]
5966
    if constants.BE_VCPUS in self.be_new:
5967
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
5968
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
5969
    # information at all.
5970
    if self.op.nics:
5971
      args['nics'] = []
5972
      nic_override = dict(self.op.nics)
5973
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
5974
      for idx, nic in enumerate(self.instance.nics):
5975
        if idx in nic_override:
5976
          this_nic_override = nic_override[idx]
5977
        else:
5978
          this_nic_override = {}
5979
        if 'ip' in this_nic_override:
5980
          ip = this_nic_override['ip']
5981
        else:
5982
          ip = nic.ip
5983
        if 'mac' in this_nic_override:
5984
          mac = this_nic_override['mac']
5985
        else:
5986
          mac = nic.mac
5987
        if idx in self.nic_pnew:
5988
          nicparams = self.nic_pnew[idx]
5989
        else:
5990
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
5991
        mode = nicparams[constants.NIC_MODE]
5992
        link = nicparams[constants.NIC_LINK]
5993
        args['nics'].append((ip, mac, mode, link))
5994
      if constants.DDM_ADD in nic_override:
5995
        ip = nic_override[constants.DDM_ADD].get('ip', None)
5996
        mac = nic_override[constants.DDM_ADD]['mac']
5997
        nicparams = self.nic_pnew[constants.DDM_ADD]
5998
        mode = nicparams[constants.NIC_MODE]
5999
        link = nicparams[constants.NIC_LINK]
6000
        args['nics'].append((ip, mac, mode, link))
6001
      elif constants.DDM_REMOVE in nic_override:
6002
        del args['nics'][-1]
6003

    
6004
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
6005
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6006
    return env, nl, nl
6007

    
6008
  def _GetUpdatedParams(self, old_params, update_dict,
6009
                        default_values, parameter_types):
6010
    """Return the new params dict for the given params.
6011

6012
    @type old_params: dict
6013
    @type old_params: old parameters
6014
    @type update_dict: dict
6015
    @type update_dict: dict containing new parameter values,
6016
                       or constants.VALUE_DEFAULT to reset the
6017
                       parameter to its default value
6018
    @type default_values: dict
6019
    @param default_values: default values for the filled parameters
6020
    @type parameter_types: dict
6021
    @param parameter_types: dict mapping target dict keys to types
6022
                            in constants.ENFORCEABLE_TYPES
6023
    @rtype: (dict, dict)
6024
    @return: (new_parameters, filled_parameters)
6025

6026
    """
6027
    params_copy = copy.deepcopy(old_params)
6028
    for key, val in update_dict.iteritems():
6029
      if val == constants.VALUE_DEFAULT:
6030
        try:
6031
          del params_copy[key]
6032
        except KeyError:
6033
          pass
6034
      else:
6035
        params_copy[key] = val
6036
    utils.ForceDictType(params_copy, parameter_types)
6037
    params_filled = objects.FillDict(default_values, params_copy)
6038
    return (params_copy, params_filled)
6039

    
6040
  def CheckPrereq(self):
6041
    """Check prerequisites.
6042

6043
    This only checks the instance list against the existing names.
6044

6045
    """
6046
    force = self.force = self.op.force
6047

    
6048
    # checking the new params on the primary/secondary nodes
6049

    
6050
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6051
    cluster = self.cluster = self.cfg.GetClusterInfo()
6052
    assert self.instance is not None, \
6053
      "Cannot retrieve locked instance %s" % self.op.instance_name
6054
    pnode = instance.primary_node
6055
    nodelist = list(instance.all_nodes)
6056

    
6057
    # hvparams processing
6058
    if self.op.hvparams:
6059
      i_hvdict, hv_new = self._GetUpdatedParams(
6060
                             instance.hvparams, self.op.hvparams,
6061
                             cluster.hvparams[instance.hypervisor],
6062
                             constants.HVS_PARAMETER_TYPES)
6063
      # local check
6064
      hypervisor.GetHypervisor(
6065
        instance.hypervisor).CheckParameterSyntax(hv_new)
6066
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
6067
      self.hv_new = hv_new # the new actual values
6068
      self.hv_inst = i_hvdict # the new dict (without defaults)
6069
    else:
6070
      self.hv_new = self.hv_inst = {}
6071

    
6072
    # beparams processing
6073
    if self.op.beparams:
6074
      i_bedict, be_new = self._GetUpdatedParams(
6075
                             instance.beparams, self.op.beparams,
6076
                             cluster.beparams[constants.PP_DEFAULT],
6077
                             constants.BES_PARAMETER_TYPES)
6078
      self.be_new = be_new # the new actual values
6079
      self.be_inst = i_bedict # the new dict (without defaults)
6080
    else:
6081
      self.be_new = self.be_inst = {}
6082

    
6083
    self.warn = []
6084

    
6085
    if constants.BE_MEMORY in self.op.beparams and not self.force:
6086
      mem_check_list = [pnode]
6087
      if be_new[constants.BE_AUTO_BALANCE]:
6088
        # either we changed auto_balance to yes or it was from before
6089
        mem_check_list.extend(instance.secondary_nodes)
6090
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
6091
                                                  instance.hypervisor)
6092
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
6093
                                         instance.hypervisor)
6094
      if nodeinfo[pnode].failed or not isinstance(nodeinfo[pnode].data, dict):
6095
        # Assume the primary node is unreachable and go ahead
6096
        self.warn.append("Can't get info from primary node %s" % pnode)
6097
      else:
6098
        if not instance_info.failed and instance_info.data:
6099
          current_mem = int(instance_info.data['memory'])
6100
        else:
6101
          # Assume instance not running
6102
          # (there is a slight race condition here, but it's not very probable,
6103
          # and we have no other way to check)
6104
          current_mem = 0
6105
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
6106
                    nodeinfo[pnode].data['memory_free'])
6107
        if miss_mem > 0:
6108
          raise errors.OpPrereqError("This change will prevent the instance"
6109
                                     " from starting, due to %d MB of memory"
6110
                                     " missing on its primary node" % miss_mem)
6111

    
6112
      if be_new[constants.BE_AUTO_BALANCE]:
6113
        for node, nres in nodeinfo.iteritems():
6114
          if node not in instance.secondary_nodes:
6115
            continue
6116
          if nres.failed or not isinstance(nres.data, dict):
6117
            self.warn.append("Can't get info from secondary node %s" % node)
6118
          elif be_new[constants.BE_MEMORY] > nres.data['memory_free']:
6119
            self.warn.append("Not enough memory to failover instance to"
6120
                             " secondary node %s" % node)
6121

    
6122
    # NIC processing
6123
    self.nic_pnew = {}
6124
    self.nic_pinst = {}
6125
    for nic_op, nic_dict in self.op.nics:
6126
      if nic_op == constants.DDM_REMOVE:
6127
        if not instance.nics:
6128
          raise errors.OpPrereqError("Instance has no NICs, cannot remove")
6129
        continue
6130
      if nic_op != constants.DDM_ADD:
6131
        # an existing nic
6132
        if nic_op < 0 or nic_op >= len(instance.nics):
6133
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
6134
                                     " are 0 to %d" %
6135
                                     (nic_op, len(instance.nics)))
6136
        old_nic_params = instance.nics[nic_op].nicparams
6137
        old_nic_ip = instance.nics[nic_op].ip
6138
      else:
6139
        old_nic_params = {}
6140
        old_nic_ip = None
6141

    
6142
      update_params_dict = dict([(key, nic_dict[key])
6143
                                 for key in constants.NICS_PARAMETERS
6144
                                 if key in nic_dict])
6145

    
6146
      if 'bridge' in nic_dict:
6147
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
6148

    
6149
      new_nic_params, new_filled_nic_params = \
6150
          self._GetUpdatedParams(old_nic_params, update_params_dict,
6151
                                 cluster.nicparams[constants.PP_DEFAULT],
6152
                                 constants.NICS_PARAMETER_TYPES)
6153
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
6154
      self.nic_pinst[nic_op] = new_nic_params
6155
      self.nic_pnew[nic_op] = new_filled_nic_params
6156
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
6157

    
6158
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
6159
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
6160
        result = self.rpc.call_bridges_exist(pnode, [nic_bridge])
6161
        result.Raise()
6162
        if not result.data:
6163
          msg = ("Bridge '%s' doesn't exist on one of"
6164
                 " the instance nodes" % nic_bridge)
6165
          if self.force:
6166
            self.warn.append(msg)
6167
          else:
6168
            raise errors.OpPrereqError(msg)
6169
      if new_nic_mode == constants.NIC_MODE_ROUTED:
6170
        if 'ip' in nic_dict:
6171
          nic_ip = nic_dict['ip']
6172
        else:
6173
          nic_ip = old_nic_ip
6174
        if nic_ip is None:
6175
          raise errors.OpPrereqError('Cannot set the nic ip to None'
6176
                                     ' on a routed nic')
6177
      if 'mac' in nic_dict:
6178
        nic_mac = nic_dict['mac']
6179
        if nic_mac is None:
6180
          raise errors.OpPrereqError('Cannot set the nic mac to None')
6181
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6182
          # otherwise generate the mac
6183
          nic_dict['mac'] = self.cfg.GenerateMAC()
6184
        else:
6185
          # or validate/reserve the current one
6186
          if self.cfg.IsMacInUse(nic_mac):
6187
            raise errors.OpPrereqError("MAC address %s already in use"
6188
                                       " in cluster" % nic_mac)
6189

    
6190
    # DISK processing
6191
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
6192
      raise errors.OpPrereqError("Disk operations not supported for"
6193
                                 " diskless instances")
6194
    for disk_op, disk_dict in self.op.disks:
6195
      if disk_op == constants.DDM_REMOVE:
6196
        if len(instance.disks) == 1:
6197
          raise errors.OpPrereqError("Cannot remove the last disk of"
6198
                                     " an instance")
6199
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
6200
        ins_l = ins_l[pnode]
6201
        if ins_l.failed or not isinstance(ins_l.data, list):
6202
          raise errors.OpPrereqError("Can't contact node '%s'" % pnode)
6203
        if instance.name in ins_l.data:
6204
          raise errors.OpPrereqError("Instance is running, can't remove"
6205
                                     " disks.")
6206

    
6207
      if (disk_op == constants.DDM_ADD and
6208
          len(instance.nics) >= constants.MAX_DISKS):
6209
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
6210
                                   " add more" % constants.MAX_DISKS)
6211
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
6212
        # an existing disk
6213
        if disk_op < 0 or disk_op >= len(instance.disks):
6214
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
6215
                                     " are 0 to %d" %
6216
                                     (disk_op, len(instance.disks)))
6217

    
6218
    return
6219

    
6220
  def Exec(self, feedback_fn):
6221
    """Modifies an instance.
6222

6223
    All parameters take effect only at the next restart of the instance.
6224

6225
    """
6226
    # Process here the warnings from CheckPrereq, as we don't have a
6227
    # feedback_fn there.
6228
    for warn in self.warn:
6229
      feedback_fn("WARNING: %s" % warn)
6230

    
6231
    result = []
6232
    instance = self.instance
6233
    cluster = self.cluster
6234
    # disk changes
6235
    for disk_op, disk_dict in self.op.disks:
6236
      if disk_op == constants.DDM_REMOVE:
6237
        # remove the last disk
6238
        device = instance.disks.pop()
6239
        device_idx = len(instance.disks)
6240
        for node, disk in device.ComputeNodeTree(instance.primary_node):
6241
          self.cfg.SetDiskID(disk, node)
6242
          msg = self.rpc.call_blockdev_remove(node, disk).RemoteFailMsg()
6243
          if msg:
6244
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
6245
                            " continuing anyway", device_idx, node, msg)
6246
        result.append(("disk/%d" % device_idx, "remove"))
6247
      elif disk_op == constants.DDM_ADD:
6248
        # add a new disk
6249
        if instance.disk_template == constants.DT_FILE:
6250
          file_driver, file_path = instance.disks[0].logical_id
6251
          file_path = os.path.dirname(file_path)
6252
        else:
6253
          file_driver = file_path = None
6254
        disk_idx_base = len(instance.disks)
6255
        new_disk = _GenerateDiskTemplate(self,
6256
                                         instance.disk_template,
6257
                                         instance.name, instance.primary_node,
6258
                                         instance.secondary_nodes,
6259
                                         [disk_dict],
6260
                                         file_path,
6261
                                         file_driver,
6262
                                         disk_idx_base)[0]
6263
        instance.disks.append(new_disk)
6264
        info = _GetInstanceInfoText(instance)
6265

    
6266
        logging.info("Creating volume %s for instance %s",
6267
                     new_disk.iv_name, instance.name)
6268
        # Note: this needs to be kept in sync with _CreateDisks
6269
        #HARDCODE
6270
        for node in instance.all_nodes:
6271
          f_create = node == instance.primary_node
6272
          try:
6273
            _CreateBlockDev(self, node, instance, new_disk,
6274
                            f_create, info, f_create)
6275
          except errors.OpExecError, err:
6276
            self.LogWarning("Failed to create volume %s (%s) on"
6277
                            " node %s: %s",
6278
                            new_disk.iv_name, new_disk, node, err)
6279
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
6280
                       (new_disk.size, new_disk.mode)))
6281
      else:
6282
        # change a given disk
6283
        instance.disks[disk_op].mode = disk_dict['mode']
6284
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
6285
    # NIC changes
6286
    for nic_op, nic_dict in self.op.nics:
6287
      if nic_op == constants.DDM_REMOVE:
6288
        # remove the last nic
6289
        del instance.nics[-1]
6290
        result.append(("nic.%d" % len(instance.nics), "remove"))
6291
      elif nic_op == constants.DDM_ADD:
6292
        # mac and bridge should be set, by now
6293
        mac = nic_dict['mac']
6294
        ip = nic_dict.get('ip', None)
6295
        nicparams = self.nic_pinst[constants.DDM_ADD]
6296
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
6297
        instance.nics.append(new_nic)
6298
        result.append(("nic.%d" % (len(instance.nics) - 1),
6299
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
6300
                       (new_nic.mac, new_nic.ip,
6301
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
6302
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
6303
                       )))
6304
      else:
6305
        for key in 'mac', 'ip':
6306
          if key in nic_dict:
6307
            setattr(instance.nics[nic_op], key, nic_dict[key])
6308
        if nic_op in self.nic_pnew:
6309
          instance.nics[nic_op].nicparams = self.nic_pnew[nic_op]
6310
        for key, val in nic_dict.iteritems():
6311
          result.append(("nic.%s/%d" % (key, nic_op), val))
6312

    
6313
    # hvparams changes
6314
    if self.op.hvparams:
6315
      instance.hvparams = self.hv_inst
6316
      for key, val in self.op.hvparams.iteritems():
6317
        result.append(("hv/%s" % key, val))
6318

    
6319
    # beparams changes
6320
    if self.op.beparams:
6321
      instance.beparams = self.be_inst
6322
      for key, val in self.op.beparams.iteritems():
6323
        result.append(("be/%s" % key, val))
6324

    
6325
    self.cfg.Update(instance)
6326

    
6327
    return result
6328

    
6329

    
6330
class LUQueryExports(NoHooksLU):
6331
  """Query the exports list
6332

6333
  """
6334
  _OP_REQP = ['nodes']
6335
  REQ_BGL = False
6336

    
6337
  def ExpandNames(self):
6338
    self.needed_locks = {}
6339
    self.share_locks[locking.LEVEL_NODE] = 1
6340
    if not self.op.nodes:
6341
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6342
    else:
6343
      self.needed_locks[locking.LEVEL_NODE] = \
6344
        _GetWantedNodes(self, self.op.nodes)
6345

    
6346
  def CheckPrereq(self):
6347
    """Check prerequisites.
6348

6349
    """
6350
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
6351

    
6352
  def Exec(self, feedback_fn):
6353
    """Compute the list of all the exported system images.
6354

6355
    @rtype: dict
6356
    @return: a dictionary with the structure node->(export-list)
6357
        where export-list is a list of the instances exported on
6358
        that node.
6359

6360
    """
6361
    rpcresult = self.rpc.call_export_list(self.nodes)
6362
    result = {}
6363
    for node in rpcresult:
6364
      if rpcresult[node].failed:
6365
        result[node] = False
6366
      else:
6367
        result[node] = rpcresult[node].data
6368

    
6369
    return result
6370

    
6371

    
6372
class LUExportInstance(LogicalUnit):
6373
  """Export an instance to an image in the cluster.
6374

6375
  """
6376
  HPATH = "instance-export"
6377
  HTYPE = constants.HTYPE_INSTANCE
6378
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
6379
  REQ_BGL = False
6380

    
6381
  def ExpandNames(self):
6382
    self._ExpandAndLockInstance()
6383
    # FIXME: lock only instance primary and destination node
6384
    #
6385
    # Sad but true, for now we have do lock all nodes, as we don't know where
6386
    # the previous export might be, and and in this LU we search for it and
6387
    # remove it from its current node. In the future we could fix this by:
6388
    #  - making a tasklet to search (share-lock all), then create the new one,
6389
    #    then one to remove, after
6390
    #  - removing the removal operation altoghether
6391
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6392

    
6393
  def DeclareLocks(self, level):
6394
    """Last minute lock declaration."""
6395
    # All nodes are locked anyway, so nothing to do here.
6396

    
6397
  def BuildHooksEnv(self):
6398
    """Build hooks env.
6399

6400
    This will run on the master, primary node and target node.
6401

6402
    """
6403
    env = {
6404
      "EXPORT_NODE": self.op.target_node,
6405
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
6406
      }
6407
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6408
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
6409
          self.op.target_node]
6410
    return env, nl, nl
6411

    
6412
  def CheckPrereq(self):
6413
    """Check prerequisites.
6414

6415
    This checks that the instance and node names are valid.
6416

6417
    """
6418
    instance_name = self.op.instance_name
6419
    self.instance = self.cfg.GetInstanceInfo(instance_name)
6420
    assert self.instance is not None, \
6421
          "Cannot retrieve locked instance %s" % self.op.instance_name
6422
    _CheckNodeOnline(self, self.instance.primary_node)
6423

    
6424
    self.dst_node = self.cfg.GetNodeInfo(
6425
      self.cfg.ExpandNodeName(self.op.target_node))
6426

    
6427
    if self.dst_node is None:
6428
      # This is wrong node name, not a non-locked node
6429
      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
6430
    _CheckNodeOnline(self, self.dst_node.name)
6431
    _CheckNodeNotDrained(self, self.dst_node.name)
6432

    
6433
    # instance disk type verification
6434
    for disk in self.instance.disks:
6435
      if disk.dev_type == constants.LD_FILE:
6436
        raise errors.OpPrereqError("Export not supported for instances with"
6437
                                   " file-based disks")
6438

    
6439
  def Exec(self, feedback_fn):
6440
    """Export an instance to an image in the cluster.
6441

6442
    """
6443
    instance = self.instance
6444
    dst_node = self.dst_node
6445
    src_node = instance.primary_node
6446
    if self.op.shutdown:
6447
      # shutdown the instance, but not the disks
6448
      result = self.rpc.call_instance_shutdown(src_node, instance)
6449
      msg = result.RemoteFailMsg()
6450
      if msg:
6451
        raise errors.OpExecError("Could not shutdown instance %s on"
6452
                                 " node %s: %s" %
6453
                                 (instance.name, src_node, msg))
6454

    
6455
    vgname = self.cfg.GetVGName()
6456

    
6457
    snap_disks = []
6458

    
6459
    # set the disks ID correctly since call_instance_start needs the
6460
    # correct drbd minor to create the symlinks
6461
    for disk in instance.disks:
6462
      self.cfg.SetDiskID(disk, src_node)
6463

    
6464
    try:
6465
      for disk in instance.disks:
6466
        # result.payload will be a snapshot of an lvm leaf of the one we passed
6467
        result = self.rpc.call_blockdev_snapshot(src_node, disk)
6468
        msg = result.RemoteFailMsg()
6469
        if msg:
6470
          self.LogWarning("Could not snapshot block device %s on node %s: %s",
6471
                          disk.logical_id[1], src_node, msg)
6472
          snap_disks.append(False)
6473
        else:
6474
          disk_id = (vgname, result.payload)
6475
          new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
6476
                                 logical_id=disk_id, physical_id=disk_id,
6477
                                 iv_name=disk.iv_name)
6478
          snap_disks.append(new_dev)
6479

    
6480
    finally:
6481
      if self.op.shutdown and instance.admin_up:
6482
        result = self.rpc.call_instance_start(src_node, instance, None, None)
6483
        msg = result.RemoteFailMsg()
6484
        if msg:
6485
          _ShutdownInstanceDisks(self, instance)
6486
          raise errors.OpExecError("Could not start instance: %s" % msg)
6487

    
6488
    # TODO: check for size
6489

    
6490
    cluster_name = self.cfg.GetClusterName()
6491
    for idx, dev in enumerate(snap_disks):
6492
      if dev:
6493
        result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
6494
                                               instance, cluster_name, idx)
6495
        msg = result.RemoteFailMsg()
6496
        if msg:
6497
          self.LogWarning("Could not export block device %s from node %s to"
6498
                          " node %s: %s", dev.logical_id[1], src_node,
6499
                          dst_node.name, msg)
6500
        msg = self.rpc.call_blockdev_remove(src_node, dev).RemoteFailMsg()
6501
        if msg:
6502
          self.LogWarning("Could not remove snapshot block device %s from node"
6503
                          " %s: %s", dev.logical_id[1], src_node, msg)
6504

    
6505
    result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
6506
    msg = result.RemoteFailMsg()
6507
    if msg:
6508
      self.LogWarning("Could not finalize export for instance %s"
6509
                      " on node %s: %s", instance.name, dst_node.name, msg)
6510

    
6511
    nodelist = self.cfg.GetNodeList()
6512
    nodelist.remove(dst_node.name)
6513

    
6514
    # on one-node clusters nodelist will be empty after the removal
6515
    # if we proceed the backup would be removed because OpQueryExports
6516
    # substitutes an empty list with the full cluster node list.
6517
    if nodelist:
6518
      exportlist = self.rpc.call_export_list(nodelist)
6519
      for node in exportlist:
6520
        if exportlist[node].failed:
6521
          continue
6522
        if instance.name in exportlist[node].data:
6523
          if not self.rpc.call_export_remove(node, instance.name):
6524
            self.LogWarning("Could not remove older export for instance %s"
6525
                            " on node %s", instance.name, node)
6526

    
6527

    
6528
class LURemoveExport(NoHooksLU):
6529
  """Remove exports related to the named instance.
6530

6531
  """
6532
  _OP_REQP = ["instance_name"]
6533
  REQ_BGL = False
6534

    
6535
  def ExpandNames(self):
6536
    self.needed_locks = {}
6537
    # We need all nodes to be locked in order for RemoveExport to work, but we
6538
    # don't need to lock the instance itself, as nothing will happen to it (and
6539
    # we can remove exports also for a removed instance)
6540
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6541

    
6542
  def CheckPrereq(self):
6543
    """Check prerequisites.
6544
    """
6545
    pass
6546

    
6547
  def Exec(self, feedback_fn):
6548
    """Remove any export.
6549

6550
    """
6551
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
6552
    # If the instance was not found we'll try with the name that was passed in.
6553
    # This will only work if it was an FQDN, though.
6554
    fqdn_warn = False
6555
    if not instance_name:
6556
      fqdn_warn = True
6557
      instance_name = self.op.instance_name
6558

    
6559
    exportlist = self.rpc.call_export_list(self.acquired_locks[
6560
      locking.LEVEL_NODE])
6561
    found = False
6562
    for node in exportlist:
6563
      if exportlist[node].failed:
6564
        self.LogWarning("Failed to query node %s, continuing" % node)
6565
        continue
6566
      if instance_name in exportlist[node].data:
6567
        found = True
6568
        result = self.rpc.call_export_remove(node, instance_name)
6569
        if result.failed or not result.data:
6570
          logging.error("Could not remove export for instance %s"
6571
                        " on node %s", instance_name, node)
6572

    
6573
    if fqdn_warn and not found:
6574
      feedback_fn("Export not found. If trying to remove an export belonging"
6575
                  " to a deleted instance please use its Fully Qualified"
6576
                  " Domain Name.")
6577

    
6578

    
6579
class TagsLU(NoHooksLU):
6580
  """Generic tags LU.
6581

6582
  This is an abstract class which is the parent of all the other tags LUs.
6583

6584
  """
6585

    
6586
  def ExpandNames(self):
6587
    self.needed_locks = {}
6588
    if self.op.kind == constants.TAG_NODE:
6589
      name = self.cfg.ExpandNodeName(self.op.name)
6590
      if name is None:
6591
        raise errors.OpPrereqError("Invalid node name (%s)" %
6592
                                   (self.op.name,))
6593
      self.op.name = name
6594
      self.needed_locks[locking.LEVEL_NODE] = name
6595
    elif self.op.kind == constants.TAG_INSTANCE:
6596
      name = self.cfg.ExpandInstanceName(self.op.name)
6597
      if name is None:
6598
        raise errors.OpPrereqError("Invalid instance name (%s)" %
6599
                                   (self.op.name,))
6600
      self.op.name = name
6601
      self.needed_locks[locking.LEVEL_INSTANCE] = name
6602

    
6603
  def CheckPrereq(self):
6604
    """Check prerequisites.
6605

6606
    """
6607
    if self.op.kind == constants.TAG_CLUSTER:
6608
      self.target = self.cfg.GetClusterInfo()
6609
    elif self.op.kind == constants.TAG_NODE:
6610
      self.target = self.cfg.GetNodeInfo(self.op.name)
6611
    elif self.op.kind == constants.TAG_INSTANCE:
6612
      self.target = self.cfg.GetInstanceInfo(self.op.name)
6613
    else:
6614
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
6615
                                 str(self.op.kind))
6616

    
6617

    
6618
class LUGetTags(TagsLU):
6619
  """Returns the tags of a given object.
6620

6621
  """
6622
  _OP_REQP = ["kind", "name"]
6623
  REQ_BGL = False
6624

    
6625
  def Exec(self, feedback_fn):
6626
    """Returns the tag list.
6627

6628
    """
6629
    return list(self.target.GetTags())
6630

    
6631

    
6632
class LUSearchTags(NoHooksLU):
6633
  """Searches the tags for a given pattern.
6634

6635
  """
6636
  _OP_REQP = ["pattern"]
6637
  REQ_BGL = False
6638

    
6639
  def ExpandNames(self):
6640
    self.needed_locks = {}
6641

    
6642
  def CheckPrereq(self):
6643
    """Check prerequisites.
6644

6645
    This checks the pattern passed for validity by compiling it.
6646

6647
    """
6648
    try:
6649
      self.re = re.compile(self.op.pattern)
6650
    except re.error, err:
6651
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
6652
                                 (self.op.pattern, err))
6653

    
6654
  def Exec(self, feedback_fn):
6655
    """Returns the tag list.
6656

6657
    """
6658
    cfg = self.cfg
6659
    tgts = [("/cluster", cfg.GetClusterInfo())]
6660
    ilist = cfg.GetAllInstancesInfo().values()
6661
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
6662
    nlist = cfg.GetAllNodesInfo().values()
6663
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
6664
    results = []
6665
    for path, target in tgts:
6666
      for tag in target.GetTags():
6667
        if self.re.search(tag):
6668
          results.append((path, tag))
6669
    return results
6670

    
6671

    
6672
class LUAddTags(TagsLU):
6673
  """Sets a tag on a given object.
6674

6675
  """
6676
  _OP_REQP = ["kind", "name", "tags"]
6677
  REQ_BGL = False
6678

    
6679
  def CheckPrereq(self):
6680
    """Check prerequisites.
6681

6682
    This checks the type and length of the tag name and value.
6683

6684
    """
6685
    TagsLU.CheckPrereq(self)
6686
    for tag in self.op.tags:
6687
      objects.TaggableObject.ValidateTag(tag)
6688

    
6689
  def Exec(self, feedback_fn):
6690
    """Sets the tag.
6691

6692
    """
6693
    try:
6694
      for tag in self.op.tags:
6695
        self.target.AddTag(tag)
6696
    except errors.TagError, err:
6697
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
6698
    try:
6699
      self.cfg.Update(self.target)
6700
    except errors.ConfigurationError:
6701
      raise errors.OpRetryError("There has been a modification to the"
6702
                                " config file and the operation has been"
6703
                                " aborted. Please retry.")
6704

    
6705

    
6706
class LUDelTags(TagsLU):
6707
  """Delete a list of tags from a given object.
6708

6709
  """
6710
  _OP_REQP = ["kind", "name", "tags"]
6711
  REQ_BGL = False
6712

    
6713
  def CheckPrereq(self):
6714
    """Check prerequisites.
6715

6716
    This checks that we have the given tag.
6717

6718
    """
6719
    TagsLU.CheckPrereq(self)
6720
    for tag in self.op.tags:
6721
      objects.TaggableObject.ValidateTag(tag)
6722
    del_tags = frozenset(self.op.tags)
6723
    cur_tags = self.target.GetTags()
6724
    if not del_tags <= cur_tags:
6725
      diff_tags = del_tags - cur_tags
6726
      diff_names = ["'%s'" % tag for tag in diff_tags]
6727
      diff_names.sort()
6728
      raise errors.OpPrereqError("Tag(s) %s not found" %
6729
                                 (",".join(diff_names)))
6730

    
6731
  def Exec(self, feedback_fn):
6732
    """Remove the tag from the object.
6733

6734
    """
6735
    for tag in self.op.tags:
6736
      self.target.RemoveTag(tag)
6737
    try:
6738
      self.cfg.Update(self.target)
6739
    except errors.ConfigurationError:
6740
      raise errors.OpRetryError("There has been a modification to the"
6741
                                " config file and the operation has been"
6742
                                " aborted. Please retry.")
6743

    
6744

    
6745
class LUTestDelay(NoHooksLU):
6746
  """Sleep for a specified amount of time.
6747

6748
  This LU sleeps on the master and/or nodes for a specified amount of
6749
  time.
6750

6751
  """
6752
  _OP_REQP = ["duration", "on_master", "on_nodes"]
6753
  REQ_BGL = False
6754

    
6755
  def ExpandNames(self):
6756
    """Expand names and set required locks.
6757

6758
    This expands the node list, if any.
6759

6760
    """
6761
    self.needed_locks = {}
6762
    if self.op.on_nodes:
6763
      # _GetWantedNodes can be used here, but is not always appropriate to use
6764
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
6765
      # more information.
6766
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
6767
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
6768

    
6769
  def CheckPrereq(self):
6770
    """Check prerequisites.
6771

6772
    """
6773

    
6774
  def Exec(self, feedback_fn):
6775
    """Do the actual sleep.
6776

6777
    """
6778
    if self.op.on_master:
6779
      if not utils.TestDelay(self.op.duration):
6780
        raise errors.OpExecError("Error during master delay test")
6781
    if self.op.on_nodes:
6782
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
6783
      if not result:
6784
        raise errors.OpExecError("Complete failure from rpc call")
6785
      for node, node_result in result.items():
6786
        node_result.Raise()
6787
        if not node_result.data:
6788
          raise errors.OpExecError("Failure during rpc call to node %s,"
6789
                                   " result: %s" % (node, node_result.data))
6790

    
6791

    
6792
class IAllocator(object):
6793
  """IAllocator framework.
6794

6795
  An IAllocator instance has three sets of attributes:
6796
    - cfg that is needed to query the cluster
6797
    - input data (all members of the _KEYS class attribute are required)
6798
    - four buffer attributes (in|out_data|text), that represent the
6799
      input (to the external script) in text and data structure format,
6800
      and the output from it, again in two formats
6801
    - the result variables from the script (success, info, nodes) for
6802
      easy usage
6803

6804
  """
6805
  _ALLO_KEYS = [
6806
    "mem_size", "disks", "disk_template",
6807
    "os", "tags", "nics", "vcpus", "hypervisor",
6808
    ]
6809
  _RELO_KEYS = [
6810
    "relocate_from",
6811
    ]
6812

    
6813
  def __init__(self, lu, mode, name, **kwargs):
6814
    self.lu = lu
6815
    # init buffer variables
6816
    self.in_text = self.out_text = self.in_data = self.out_data = None
6817
    # init all input fields so that pylint is happy
6818
    self.mode = mode
6819
    self.name = name
6820
    self.mem_size = self.disks = self.disk_template = None
6821
    self.os = self.tags = self.nics = self.vcpus = None
6822
    self.hypervisor = None
6823
    self.relocate_from = None
6824
    # computed fields
6825
    self.required_nodes = None
6826
    # init result fields
6827
    self.success = self.info = self.nodes = None
6828
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
6829
      keyset = self._ALLO_KEYS
6830
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
6831
      keyset = self._RELO_KEYS
6832
    else:
6833
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
6834
                                   " IAllocator" % self.mode)
6835
    for key in kwargs:
6836
      if key not in keyset:
6837
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
6838
                                     " IAllocator" % key)
6839
      setattr(self, key, kwargs[key])
6840
    for key in keyset:
6841
      if key not in kwargs:
6842
        raise errors.ProgrammerError("Missing input parameter '%s' to"
6843
                                     " IAllocator" % key)
6844
    self._BuildInputData()
6845

    
6846
  def _ComputeClusterData(self):
6847
    """Compute the generic allocator input data.
6848

6849
    This is the data that is independent of the actual operation.
6850

6851
    """
6852
    cfg = self.lu.cfg
6853
    cluster_info = cfg.GetClusterInfo()
6854
    # cluster data
6855
    data = {
6856
      "version": constants.IALLOCATOR_VERSION,
6857
      "cluster_name": cfg.GetClusterName(),
6858
      "cluster_tags": list(cluster_info.GetTags()),
6859
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
6860
      # we don't have job IDs
6861
      }
6862
    iinfo = cfg.GetAllInstancesInfo().values()
6863
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
6864

    
6865
    # node data
6866
    node_results = {}
6867
    node_list = cfg.GetNodeList()
6868

    
6869
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
6870
      hypervisor_name = self.hypervisor
6871
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
6872
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
6873

    
6874
    node_data = self.lu.rpc.call_node_info(node_list, cfg.GetVGName(),
6875
                                           hypervisor_name)
6876
    node_iinfo = self.lu.rpc.call_all_instances_info(node_list,
6877
                       cluster_info.enabled_hypervisors)
6878
    for nname, nresult in node_data.items():
6879
      # first fill in static (config-based) values
6880
      ninfo = cfg.GetNodeInfo(nname)
6881
      pnr = {
6882
        "tags": list(ninfo.GetTags()),
6883
        "primary_ip": ninfo.primary_ip,
6884
        "secondary_ip": ninfo.secondary_ip,
6885
        "offline": ninfo.offline,
6886
        "drained": ninfo.drained,
6887
        "master_candidate": ninfo.master_candidate,
6888
        }
6889

    
6890
      if not ninfo.offline:
6891
        nresult.Raise()
6892
        if not isinstance(nresult.data, dict):
6893
          raise errors.OpExecError("Can't get data for node %s" % nname)
6894
        remote_info = nresult.data
6895
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
6896
                     'vg_size', 'vg_free', 'cpu_total']:
6897
          if attr not in remote_info:
6898
            raise errors.OpExecError("Node '%s' didn't return attribute"
6899
                                     " '%s'" % (nname, attr))
6900
          try:
6901
            remote_info[attr] = int(remote_info[attr])
6902
          except ValueError, err:
6903
            raise errors.OpExecError("Node '%s' returned invalid value"
6904
                                     " for '%s': %s" % (nname, attr, err))
6905
        # compute memory used by primary instances
6906
        i_p_mem = i_p_up_mem = 0
6907
        for iinfo, beinfo in i_list:
6908
          if iinfo.primary_node == nname:
6909
            i_p_mem += beinfo[constants.BE_MEMORY]
6910
            if iinfo.name not in node_iinfo[nname].data:
6911
              i_used_mem = 0
6912
            else:
6913
              i_used_mem = int(node_iinfo[nname].data[iinfo.name]['memory'])
6914
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
6915
            remote_info['memory_free'] -= max(0, i_mem_diff)
6916

    
6917
            if iinfo.admin_up:
6918
              i_p_up_mem += beinfo[constants.BE_MEMORY]
6919

    
6920
        # compute memory used by instances
6921
        pnr_dyn = {
6922
          "total_memory": remote_info['memory_total'],
6923
          "reserved_memory": remote_info['memory_dom0'],
6924
          "free_memory": remote_info['memory_free'],
6925
          "total_disk": remote_info['vg_size'],
6926
          "free_disk": remote_info['vg_free'],
6927
          "total_cpus": remote_info['cpu_total'],
6928
          "i_pri_memory": i_p_mem,
6929
          "i_pri_up_memory": i_p_up_mem,
6930
          }
6931
        pnr.update(pnr_dyn)
6932

    
6933
      node_results[nname] = pnr
6934
    data["nodes"] = node_results
6935

    
6936
    # instance data
6937
    instance_data = {}
6938
    for iinfo, beinfo in i_list:
6939
      nic_data = []
6940
      for nic in iinfo.nics:
6941
        filled_params = objects.FillDict(
6942
            cluster_info.nicparams[constants.PP_DEFAULT],
6943
            nic.nicparams)
6944
        nic_dict = {"mac": nic.mac,
6945
                    "ip": nic.ip,
6946
                    "mode": filled_params[constants.NIC_MODE],
6947
                    "link": filled_params[constants.NIC_LINK],
6948
                   }
6949
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
6950
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
6951
        nic_data.append(nic_dict)
6952
      pir = {
6953
        "tags": list(iinfo.GetTags()),
6954
        "admin_up": iinfo.admin_up,
6955
        "vcpus": beinfo[constants.BE_VCPUS],
6956
        "memory": beinfo[constants.BE_MEMORY],
6957
        "os": iinfo.os,
6958
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
6959
        "nics": nic_data,
6960
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
6961
        "disk_template": iinfo.disk_template,
6962
        "hypervisor": iinfo.hypervisor,
6963
        }
6964
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
6965
                                                 pir["disks"])
6966
      instance_data[iinfo.name] = pir
6967

    
6968
    data["instances"] = instance_data
6969

    
6970
    self.in_data = data
6971

    
6972
  def _AddNewInstance(self):
6973
    """Add new instance data to allocator structure.
6974

6975
    This in combination with _AllocatorGetClusterData will create the
6976
    correct structure needed as input for the allocator.
6977

6978
    The checks for the completeness of the opcode must have already been
6979
    done.
6980

6981
    """
6982
    data = self.in_data
6983

    
6984
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
6985

    
6986
    if self.disk_template in constants.DTS_NET_MIRROR:
6987
      self.required_nodes = 2
6988
    else:
6989
      self.required_nodes = 1
6990
    request = {
6991
      "type": "allocate",
6992
      "name": self.name,
6993
      "disk_template": self.disk_template,
6994
      "tags": self.tags,
6995
      "os": self.os,
6996
      "vcpus": self.vcpus,
6997
      "memory": self.mem_size,
6998
      "disks": self.disks,
6999
      "disk_space_total": disk_space,
7000
      "nics": self.nics,
7001
      "required_nodes": self.required_nodes,
7002
      }
7003
    data["request"] = request
7004

    
7005
  def _AddRelocateInstance(self):
7006
    """Add relocate instance data to allocator structure.
7007

7008
    This in combination with _IAllocatorGetClusterData will create the
7009
    correct structure needed as input for the allocator.
7010

7011
    The checks for the completeness of the opcode must have already been
7012
    done.
7013

7014
    """
7015
    instance = self.lu.cfg.GetInstanceInfo(self.name)
7016
    if instance is None:
7017
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
7018
                                   " IAllocator" % self.name)
7019

    
7020
    if instance.disk_template not in constants.DTS_NET_MIRROR:
7021
      raise errors.OpPrereqError("Can't relocate non-mirrored instances")
7022

    
7023
    if len(instance.secondary_nodes) != 1:
7024
      raise errors.OpPrereqError("Instance has not exactly one secondary node")
7025

    
7026
    self.required_nodes = 1
7027
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
7028
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
7029

    
7030
    request = {
7031
      "type": "relocate",
7032
      "name": self.name,
7033
      "disk_space_total": disk_space,
7034
      "required_nodes": self.required_nodes,
7035
      "relocate_from": self.relocate_from,
7036
      }
7037
    self.in_data["request"] = request
7038

    
7039
  def _BuildInputData(self):
7040
    """Build input data structures.
7041

7042
    """
7043
    self._ComputeClusterData()
7044

    
7045
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7046
      self._AddNewInstance()
7047
    else:
7048
      self._AddRelocateInstance()
7049

    
7050
    self.in_text = serializer.Dump(self.in_data)
7051

    
7052
  def Run(self, name, validate=True, call_fn=None):
7053
    """Run an instance allocator and return the results.
7054

7055
    """
7056
    if call_fn is None:
7057
      call_fn = self.lu.rpc.call_iallocator_runner
7058
    data = self.in_text
7059

    
7060
    result = call_fn(self.lu.cfg.GetMasterNode(), name, self.in_text)
7061
    result.Raise()
7062

    
7063
    if not isinstance(result.data, (list, tuple)) or len(result.data) != 4:
7064
      raise errors.OpExecError("Invalid result from master iallocator runner")
7065

    
7066
    rcode, stdout, stderr, fail = result.data
7067

    
7068
    if rcode == constants.IARUN_NOTFOUND:
7069
      raise errors.OpExecError("Can't find allocator '%s'" % name)
7070
    elif rcode == constants.IARUN_FAILURE:
7071
      raise errors.OpExecError("Instance allocator call failed: %s,"
7072
                               " output: %s" % (fail, stdout+stderr))
7073
    self.out_text = stdout
7074
    if validate:
7075
      self._ValidateResult()
7076

    
7077
  def _ValidateResult(self):
7078
    """Process the allocator results.
7079

7080
    This will process and if successful save the result in
7081
    self.out_data and the other parameters.
7082

7083
    """
7084
    try:
7085
      rdict = serializer.Load(self.out_text)
7086
    except Exception, err:
7087
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
7088

    
7089
    if not isinstance(rdict, dict):
7090
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
7091

    
7092
    for key in "success", "info", "nodes":
7093
      if key not in rdict:
7094
        raise errors.OpExecError("Can't parse iallocator results:"
7095
                                 " missing key '%s'" % key)
7096
      setattr(self, key, rdict[key])
7097

    
7098
    if not isinstance(rdict["nodes"], list):
7099
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
7100
                               " is not a list")
7101
    self.out_data = rdict
7102

    
7103

    
7104
class LUTestAllocator(NoHooksLU):
7105
  """Run allocator tests.
7106

7107
  This LU runs the allocator tests
7108

7109
  """
7110
  _OP_REQP = ["direction", "mode", "name"]
7111

    
7112
  def CheckPrereq(self):
7113
    """Check prerequisites.
7114

7115
    This checks the opcode parameters depending on the director and mode test.
7116

7117
    """
7118
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
7119
      for attr in ["name", "mem_size", "disks", "disk_template",
7120
                   "os", "tags", "nics", "vcpus"]:
7121
        if not hasattr(self.op, attr):
7122
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
7123
                                     attr)
7124
      iname = self.cfg.ExpandInstanceName(self.op.name)
7125
      if iname is not None:
7126
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
7127
                                   iname)
7128
      if not isinstance(self.op.nics, list):
7129
        raise errors.OpPrereqError("Invalid parameter 'nics'")
7130
      for row in self.op.nics:
7131
        if (not isinstance(row, dict) or
7132
            "mac" not in row or
7133
            "ip" not in row or
7134
            "bridge" not in row):
7135
          raise errors.OpPrereqError("Invalid contents of the"
7136
                                     " 'nics' parameter")
7137
      if not isinstance(self.op.disks, list):
7138
        raise errors.OpPrereqError("Invalid parameter 'disks'")
7139
      for row in self.op.disks:
7140
        if (not isinstance(row, dict) or
7141
            "size" not in row or
7142
            not isinstance(row["size"], int) or
7143
            "mode" not in row or
7144
            row["mode"] not in ['r', 'w']):
7145
          raise errors.OpPrereqError("Invalid contents of the"
7146
                                     " 'disks' parameter")
7147
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
7148
        self.op.hypervisor = self.cfg.GetHypervisorType()
7149
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
7150
      if not hasattr(self.op, "name"):
7151
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
7152
      fname = self.cfg.ExpandInstanceName(self.op.name)
7153
      if fname is None:
7154
        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
7155
                                   self.op.name)
7156
      self.op.name = fname
7157
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
7158
    else:
7159
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
7160
                                 self.op.mode)
7161

    
7162
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
7163
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
7164
        raise errors.OpPrereqError("Missing allocator name")
7165
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
7166
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
7167
                                 self.op.direction)
7168

    
7169
  def Exec(self, feedback_fn):
7170
    """Run the allocator test.
7171

7172
    """
7173
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
7174
      ial = IAllocator(self,
7175
                       mode=self.op.mode,
7176
                       name=self.op.name,
7177
                       mem_size=self.op.mem_size,
7178
                       disks=self.op.disks,
7179
                       disk_template=self.op.disk_template,
7180
                       os=self.op.os,
7181
                       tags=self.op.tags,
7182
                       nics=self.op.nics,
7183
                       vcpus=self.op.vcpus,
7184
                       hypervisor=self.op.hypervisor,
7185
                       )
7186
    else:
7187
      ial = IAllocator(self,
7188
                       mode=self.op.mode,
7189
                       name=self.op.name,
7190
                       relocate_from=list(self.relocate_from),
7191
                       )
7192

    
7193
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
7194
      result = ial.in_text
7195
    else:
7196
      ial.Run(self.op.allocator, validate=False)
7197
      result = ial.out_text
7198
    return result