Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ f9b10246

History | View | Annotate | Download (249.1 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0613,W0201
25

    
26
import os
27
import os.path
28
import time
29
import tempfile
30
import re
31
import platform
32
import logging
33
import copy
34
import random
35

    
36
from ganeti import ssh
37
from ganeti import utils
38
from ganeti import errors
39
from ganeti import hypervisor
40
from ganeti import locking
41
from ganeti import constants
42
from ganeti import objects
43
from ganeti import opcodes
44
from ganeti import serializer
45
from ganeti import ssconf
46

    
47

    
48
class LogicalUnit(object):
49
  """Logical Unit base class.
50

51
  Subclasses must follow these rules:
52
    - implement ExpandNames
53
    - implement CheckPrereq
54
    - implement Exec
55
    - implement BuildHooksEnv
56
    - redefine HPATH and HTYPE
57
    - optionally redefine their run requirements:
58
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
59

60
  Note that all commands require root permissions.
61

62
  """
63
  HPATH = None
64
  HTYPE = None
65
  _OP_REQP = []
66
  REQ_BGL = True
67

    
68
  def __init__(self, processor, op, context, rpc):
69
    """Constructor for LogicalUnit.
70

71
    This needs to be overriden in derived classes in order to check op
72
    validity.
73

74
    """
75
    self.proc = processor
76
    self.op = op
77
    self.cfg = context.cfg
78
    self.context = context
79
    self.rpc = rpc
80
    # Dicts used to declare locking needs to mcpu
81
    self.needed_locks = None
82
    self.acquired_locks = {}
83
    self.share_locks = dict(((i, 0) for i in locking.LEVELS))
84
    self.add_locks = {}
85
    self.remove_locks = {}
86
    # Used to force good behavior when calling helper functions
87
    self.recalculate_locks = {}
88
    self.__ssh = None
89
    # logging
90
    self.LogWarning = processor.LogWarning
91
    self.LogInfo = processor.LogInfo
92

    
93
    for attr_name in self._OP_REQP:
94
      attr_val = getattr(op, attr_name, None)
95
      if attr_val is None:
96
        raise errors.OpPrereqError("Required parameter '%s' missing" %
97
                                   attr_name)
98
    self.CheckArguments()
99

    
100
  def __GetSSH(self):
101
    """Returns the SshRunner object
102

103
    """
104
    if not self.__ssh:
105
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
106
    return self.__ssh
107

    
108
  ssh = property(fget=__GetSSH)
109

    
110
  def CheckArguments(self):
111
    """Check syntactic validity for the opcode arguments.
112

113
    This method is for doing a simple syntactic check and ensure
114
    validity of opcode parameters, without any cluster-related
115
    checks. While the same can be accomplished in ExpandNames and/or
116
    CheckPrereq, doing these separate is better because:
117

118
      - ExpandNames is left as as purely a lock-related function
119
      - CheckPrereq is run after we have aquired locks (and possible
120
        waited for them)
121

122
    The function is allowed to change the self.op attribute so that
123
    later methods can no longer worry about missing parameters.
124

125
    """
126
    pass
127

    
128
  def ExpandNames(self):
129
    """Expand names for this LU.
130

131
    This method is called before starting to execute the opcode, and it should
132
    update all the parameters of the opcode to their canonical form (e.g. a
133
    short node name must be fully expanded after this method has successfully
134
    completed). This way locking, hooks, logging, ecc. can work correctly.
135

136
    LUs which implement this method must also populate the self.needed_locks
137
    member, as a dict with lock levels as keys, and a list of needed lock names
138
    as values. Rules:
139

140
      - use an empty dict if you don't need any lock
141
      - if you don't need any lock at a particular level omit that level
142
      - don't put anything for the BGL level
143
      - if you want all locks at a level use locking.ALL_SET as a value
144

145
    If you need to share locks (rather than acquire them exclusively) at one
146
    level you can modify self.share_locks, setting a true value (usually 1) for
147
    that level. By default locks are not shared.
148

149
    Examples::
150

151
      # Acquire all nodes and one instance
152
      self.needed_locks = {
153
        locking.LEVEL_NODE: locking.ALL_SET,
154
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
155
      }
156
      # Acquire just two nodes
157
      self.needed_locks = {
158
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
159
      }
160
      # Acquire no locks
161
      self.needed_locks = {} # No, you can't leave it to the default value None
162

163
    """
164
    # The implementation of this method is mandatory only if the new LU is
165
    # concurrent, so that old LUs don't need to be changed all at the same
166
    # time.
167
    if self.REQ_BGL:
168
      self.needed_locks = {} # Exclusive LUs don't need locks.
169
    else:
170
      raise NotImplementedError
171

    
172
  def DeclareLocks(self, level):
173
    """Declare LU locking needs for a level
174

175
    While most LUs can just declare their locking needs at ExpandNames time,
176
    sometimes there's the need to calculate some locks after having acquired
177
    the ones before. This function is called just before acquiring locks at a
178
    particular level, but after acquiring the ones at lower levels, and permits
179
    such calculations. It can be used to modify self.needed_locks, and by
180
    default it does nothing.
181

182
    This function is only called if you have something already set in
183
    self.needed_locks for the level.
184

185
    @param level: Locking level which is going to be locked
186
    @type level: member of ganeti.locking.LEVELS
187

188
    """
189

    
190
  def CheckPrereq(self):
191
    """Check prerequisites for this LU.
192

193
    This method should check that the prerequisites for the execution
194
    of this LU are fulfilled. It can do internode communication, but
195
    it should be idempotent - no cluster or system changes are
196
    allowed.
197

198
    The method should raise errors.OpPrereqError in case something is
199
    not fulfilled. Its return value is ignored.
200

201
    This method should also update all the parameters of the opcode to
202
    their canonical form if it hasn't been done by ExpandNames before.
203

204
    """
205
    raise NotImplementedError
206

    
207
  def Exec(self, feedback_fn):
208
    """Execute the LU.
209

210
    This method should implement the actual work. It should raise
211
    errors.OpExecError for failures that are somewhat dealt with in
212
    code, or expected.
213

214
    """
215
    raise NotImplementedError
216

    
217
  def BuildHooksEnv(self):
218
    """Build hooks environment for this LU.
219

220
    This method should return a three-node tuple consisting of: a dict
221
    containing the environment that will be used for running the
222
    specific hook for this LU, a list of node names on which the hook
223
    should run before the execution, and a list of node names on which
224
    the hook should run after the execution.
225

226
    The keys of the dict must not have 'GANETI_' prefixed as this will
227
    be handled in the hooks runner. Also note additional keys will be
228
    added by the hooks runner. If the LU doesn't define any
229
    environment, an empty dict (and not None) should be returned.
230

231
    No nodes should be returned as an empty list (and not None).
232

233
    Note that if the HPATH for a LU class is None, this function will
234
    not be called.
235

236
    """
237
    raise NotImplementedError
238

    
239
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
240
    """Notify the LU about the results of its hooks.
241

242
    This method is called every time a hooks phase is executed, and notifies
243
    the Logical Unit about the hooks' result. The LU can then use it to alter
244
    its result based on the hooks.  By default the method does nothing and the
245
    previous result is passed back unchanged but any LU can define it if it
246
    wants to use the local cluster hook-scripts somehow.
247

248
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
249
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
250
    @param hook_results: the results of the multi-node hooks rpc call
251
    @param feedback_fn: function used send feedback back to the caller
252
    @param lu_result: the previous Exec result this LU had, or None
253
        in the PRE phase
254
    @return: the new Exec result, based on the previous result
255
        and hook results
256

257
    """
258
    return lu_result
259

    
260
  def _ExpandAndLockInstance(self):
261
    """Helper function to expand and lock an instance.
262

263
    Many LUs that work on an instance take its name in self.op.instance_name
264
    and need to expand it and then declare the expanded name for locking. This
265
    function does it, and then updates self.op.instance_name to the expanded
266
    name. It also initializes needed_locks as a dict, if this hasn't been done
267
    before.
268

269
    """
270
    if self.needed_locks is None:
271
      self.needed_locks = {}
272
    else:
273
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
274
        "_ExpandAndLockInstance called with instance-level locks set"
275
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
276
    if expanded_name is None:
277
      raise errors.OpPrereqError("Instance '%s' not known" %
278
                                  self.op.instance_name)
279
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
280
    self.op.instance_name = expanded_name
281

    
282
  def _LockInstancesNodes(self, primary_only=False):
283
    """Helper function to declare instances' nodes for locking.
284

285
    This function should be called after locking one or more instances to lock
286
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
287
    with all primary or secondary nodes for instances already locked and
288
    present in self.needed_locks[locking.LEVEL_INSTANCE].
289

290
    It should be called from DeclareLocks, and for safety only works if
291
    self.recalculate_locks[locking.LEVEL_NODE] is set.
292

293
    In the future it may grow parameters to just lock some instance's nodes, or
294
    to just lock primaries or secondary nodes, if needed.
295

296
    If should be called in DeclareLocks in a way similar to::
297

298
      if level == locking.LEVEL_NODE:
299
        self._LockInstancesNodes()
300

301
    @type primary_only: boolean
302
    @param primary_only: only lock primary nodes of locked instances
303

304
    """
305
    assert locking.LEVEL_NODE in self.recalculate_locks, \
306
      "_LockInstancesNodes helper function called with no nodes to recalculate"
307

    
308
    # TODO: check if we're really been called with the instance locks held
309

    
310
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
311
    # future we might want to have different behaviors depending on the value
312
    # of self.recalculate_locks[locking.LEVEL_NODE]
313
    wanted_nodes = []
314
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
315
      instance = self.context.cfg.GetInstanceInfo(instance_name)
316
      wanted_nodes.append(instance.primary_node)
317
      if not primary_only:
318
        wanted_nodes.extend(instance.secondary_nodes)
319

    
320
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
321
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
322
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
323
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
324

    
325
    del self.recalculate_locks[locking.LEVEL_NODE]
326

    
327

    
328
class NoHooksLU(LogicalUnit):
329
  """Simple LU which runs no hooks.
330

331
  This LU is intended as a parent for other LogicalUnits which will
332
  run no hooks, in order to reduce duplicate code.
333

334
  """
335
  HPATH = None
336
  HTYPE = None
337

    
338

    
339
def _GetWantedNodes(lu, nodes):
340
  """Returns list of checked and expanded node names.
341

342
  @type lu: L{LogicalUnit}
343
  @param lu: the logical unit on whose behalf we execute
344
  @type nodes: list
345
  @param nodes: list of node names or None for all nodes
346
  @rtype: list
347
  @return: the list of nodes, sorted
348
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
349

350
  """
351
  if not isinstance(nodes, list):
352
    raise errors.OpPrereqError("Invalid argument type 'nodes'")
353

    
354
  if not nodes:
355
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
356
      " non-empty list of nodes whose name is to be expanded.")
357

    
358
  wanted = []
359
  for name in nodes:
360
    node = lu.cfg.ExpandNodeName(name)
361
    if node is None:
362
      raise errors.OpPrereqError("No such node name '%s'" % name)
363
    wanted.append(node)
364

    
365
  return utils.NiceSort(wanted)
366

    
367

    
368
def _GetWantedInstances(lu, instances):
369
  """Returns list of checked and expanded instance names.
370

371
  @type lu: L{LogicalUnit}
372
  @param lu: the logical unit on whose behalf we execute
373
  @type instances: list
374
  @param instances: list of instance names or None for all instances
375
  @rtype: list
376
  @return: the list of instances, sorted
377
  @raise errors.OpPrereqError: if the instances parameter is wrong type
378
  @raise errors.OpPrereqError: if any of the passed instances is not found
379

380
  """
381
  if not isinstance(instances, list):
382
    raise errors.OpPrereqError("Invalid argument type 'instances'")
383

    
384
  if instances:
385
    wanted = []
386

    
387
    for name in instances:
388
      instance = lu.cfg.ExpandInstanceName(name)
389
      if instance is None:
390
        raise errors.OpPrereqError("No such instance name '%s'" % name)
391
      wanted.append(instance)
392

    
393
  else:
394
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
395
  return wanted
396

    
397

    
398
def _CheckOutputFields(static, dynamic, selected):
399
  """Checks whether all selected fields are valid.
400

401
  @type static: L{utils.FieldSet}
402
  @param static: static fields set
403
  @type dynamic: L{utils.FieldSet}
404
  @param dynamic: dynamic fields set
405

406
  """
407
  f = utils.FieldSet()
408
  f.Extend(static)
409
  f.Extend(dynamic)
410

    
411
  delta = f.NonMatching(selected)
412
  if delta:
413
    raise errors.OpPrereqError("Unknown output fields selected: %s"
414
                               % ",".join(delta))
415

    
416

    
417
def _CheckBooleanOpField(op, name):
418
  """Validates boolean opcode parameters.
419

420
  This will ensure that an opcode parameter is either a boolean value,
421
  or None (but that it always exists).
422

423
  """
424
  val = getattr(op, name, None)
425
  if not (val is None or isinstance(val, bool)):
426
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
427
                               (name, str(val)))
428
  setattr(op, name, val)
429

    
430

    
431
def _CheckNodeOnline(lu, node):
432
  """Ensure that a given node is online.
433

434
  @param lu: the LU on behalf of which we make the check
435
  @param node: the node to check
436
  @raise errors.OpPrereqError: if the node is offline
437

438
  """
439
  if lu.cfg.GetNodeInfo(node).offline:
440
    raise errors.OpPrereqError("Can't use offline node %s" % node)
441

    
442

    
443
def _CheckNodeNotDrained(lu, node):
444
  """Ensure that a given node is not drained.
445

446
  @param lu: the LU on behalf of which we make the check
447
  @param node: the node to check
448
  @raise errors.OpPrereqError: if the node is drained
449

450
  """
451
  if lu.cfg.GetNodeInfo(node).drained:
452
    raise errors.OpPrereqError("Can't use drained node %s" % node)
453

    
454

    
455
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
456
                          memory, vcpus, nics, disk_template, disks,
457
                          bep, hvp, hypervisor):
458
  """Builds instance related env variables for hooks
459

460
  This builds the hook environment from individual variables.
461

462
  @type name: string
463
  @param name: the name of the instance
464
  @type primary_node: string
465
  @param primary_node: the name of the instance's primary node
466
  @type secondary_nodes: list
467
  @param secondary_nodes: list of secondary nodes as strings
468
  @type os_type: string
469
  @param os_type: the name of the instance's OS
470
  @type status: boolean
471
  @param status: the should_run status of the instance
472
  @type memory: string
473
  @param memory: the memory size of the instance
474
  @type vcpus: string
475
  @param vcpus: the count of VCPUs the instance has
476
  @type nics: list
477
  @param nics: list of tuples (ip, bridge, mac) representing
478
      the NICs the instance  has
479
  @type disk_template: string
480
  @param disk_template: the distk template of the instance
481
  @type disks: list
482
  @param disks: the list of (size, mode) pairs
483
  @type bep: dict
484
  @param bep: the backend parameters for the instance
485
  @type hvp: dict
486
  @param hvp: the hypervisor parameters for the instance
487
  @type hypervisor: string
488
  @param hypervisor: the hypervisor for the instance
489
  @rtype: dict
490
  @return: the hook environment for this instance
491

492
  """
493
  if status:
494
    str_status = "up"
495
  else:
496
    str_status = "down"
497
  env = {
498
    "OP_TARGET": name,
499
    "INSTANCE_NAME": name,
500
    "INSTANCE_PRIMARY": primary_node,
501
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
502
    "INSTANCE_OS_TYPE": os_type,
503
    "INSTANCE_STATUS": str_status,
504
    "INSTANCE_MEMORY": memory,
505
    "INSTANCE_VCPUS": vcpus,
506
    "INSTANCE_DISK_TEMPLATE": disk_template,
507
    "INSTANCE_HYPERVISOR": hypervisor,
508
  }
509

    
510
  if nics:
511
    nic_count = len(nics)
512
    for idx, (ip, mac, mode, link) in enumerate(nics):
513
      if ip is None:
514
        ip = ""
515
      env["INSTANCE_NIC%d_IP" % idx] = ip
516
      env["INSTANCE_NIC%d_MAC" % idx] = mac
517
      env["INSTANCE_NIC%d_MODE" % idx] = mode
518
      env["INSTANCE_NIC%d_LINK" % idx] = link
519
      if mode == constants.NIC_MODE_BRIDGED:
520
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
521
  else:
522
    nic_count = 0
523

    
524
  env["INSTANCE_NIC_COUNT"] = nic_count
525

    
526
  if disks:
527
    disk_count = len(disks)
528
    for idx, (size, mode) in enumerate(disks):
529
      env["INSTANCE_DISK%d_SIZE" % idx] = size
530
      env["INSTANCE_DISK%d_MODE" % idx] = mode
531
  else:
532
    disk_count = 0
533

    
534
  env["INSTANCE_DISK_COUNT"] = disk_count
535

    
536
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
537
    for key, value in source.items():
538
      env["INSTANCE_%s_%s" % (kind, key)] = value
539

    
540
  return env
541

    
542
def _NICListToTuple(lu, nics):
543
  """Build a list of nic information tuples.
544

545
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
546
  value in LUQueryInstanceData.
547

548
  @type lu:  L{LogicalUnit}
549
  @param lu: the logical unit on whose behalf we execute
550
  @type nics: list of L{objects.NIC}
551
  @param nics: list of nics to convert to hooks tuples
552

553
  """
554
  hooks_nics = []
555
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
556
  for nic in nics:
557
    ip = nic.ip
558
    mac = nic.mac
559
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
560
    mode = filled_params[constants.NIC_MODE]
561
    link = filled_params[constants.NIC_LINK]
562
    hooks_nics.append((ip, mac, mode, link))
563
  return hooks_nics
564

    
565
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
566
  """Builds instance related env variables for hooks from an object.
567

568
  @type lu: L{LogicalUnit}
569
  @param lu: the logical unit on whose behalf we execute
570
  @type instance: L{objects.Instance}
571
  @param instance: the instance for which we should build the
572
      environment
573
  @type override: dict
574
  @param override: dictionary with key/values that will override
575
      our values
576
  @rtype: dict
577
  @return: the hook environment dictionary
578

579
  """
580
  cluster = lu.cfg.GetClusterInfo()
581
  bep = cluster.FillBE(instance)
582
  hvp = cluster.FillHV(instance)
583
  args = {
584
    'name': instance.name,
585
    'primary_node': instance.primary_node,
586
    'secondary_nodes': instance.secondary_nodes,
587
    'os_type': instance.os,
588
    'status': instance.admin_up,
589
    'memory': bep[constants.BE_MEMORY],
590
    'vcpus': bep[constants.BE_VCPUS],
591
    'nics': _NICListToTuple(lu, instance.nics),
592
    'disk_template': instance.disk_template,
593
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
594
    'bep': bep,
595
    'hvp': hvp,
596
    'hypervisor': instance.hypervisor,
597
  }
598
  if override:
599
    args.update(override)
600
  return _BuildInstanceHookEnv(**args)
601

    
602

    
603
def _AdjustCandidatePool(lu):
604
  """Adjust the candidate pool after node operations.
605

606
  """
607
  mod_list = lu.cfg.MaintainCandidatePool()
608
  if mod_list:
609
    lu.LogInfo("Promoted nodes to master candidate role: %s",
610
               ", ".join(node.name for node in mod_list))
611
    for name in mod_list:
612
      lu.context.ReaddNode(name)
613
  mc_now, mc_max = lu.cfg.GetMasterCandidateStats()
614
  if mc_now > mc_max:
615
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
616
               (mc_now, mc_max))
617

    
618

    
619
def _CheckNicsBridgesExist(lu, target_nics, target_node,
620
                               profile=constants.PP_DEFAULT):
621
  """Check that the brigdes needed by a list of nics exist.
622

623
  """
624
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
625
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
626
                for nic in target_nics]
627
  brlist = [params[constants.NIC_LINK] for params in paramslist
628
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
629
  if brlist:
630
    result = lu.rpc.call_bridges_exist(target_node, brlist)
631
    result.Raise("Error checking bridges on destination node '%s'" %
632
                 target_node, prereq=True)
633

    
634

    
635
def _CheckInstanceBridgesExist(lu, instance, node=None):
636
  """Check that the brigdes needed by an instance exist.
637

638
  """
639
  if node is None:
640
    node=instance.primary_node
641
  _CheckNicsBridgesExist(lu, instance.nics, node)
642

    
643

    
644
class LUDestroyCluster(NoHooksLU):
645
  """Logical unit for destroying the cluster.
646

647
  """
648
  _OP_REQP = []
649

    
650
  def CheckPrereq(self):
651
    """Check prerequisites.
652

653
    This checks whether the cluster is empty.
654

655
    Any errors are signalled by raising errors.OpPrereqError.
656

657
    """
658
    master = self.cfg.GetMasterNode()
659

    
660
    nodelist = self.cfg.GetNodeList()
661
    if len(nodelist) != 1 or nodelist[0] != master:
662
      raise errors.OpPrereqError("There are still %d node(s) in"
663
                                 " this cluster." % (len(nodelist) - 1))
664
    instancelist = self.cfg.GetInstanceList()
665
    if instancelist:
666
      raise errors.OpPrereqError("There are still %d instance(s) in"
667
                                 " this cluster." % len(instancelist))
668

    
669
  def Exec(self, feedback_fn):
670
    """Destroys the cluster.
671

672
    """
673
    master = self.cfg.GetMasterNode()
674
    result = self.rpc.call_node_stop_master(master, False)
675
    result.Raise("Could not disable the master role")
676
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
677
    utils.CreateBackup(priv_key)
678
    utils.CreateBackup(pub_key)
679
    return master
680

    
681

    
682
class LUVerifyCluster(LogicalUnit):
683
  """Verifies the cluster status.
684

685
  """
686
  HPATH = "cluster-verify"
687
  HTYPE = constants.HTYPE_CLUSTER
688
  _OP_REQP = ["skip_checks"]
689
  REQ_BGL = False
690

    
691
  def ExpandNames(self):
692
    self.needed_locks = {
693
      locking.LEVEL_NODE: locking.ALL_SET,
694
      locking.LEVEL_INSTANCE: locking.ALL_SET,
695
    }
696
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
697

    
698
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
699
                  node_result, feedback_fn, master_files,
700
                  drbd_map, vg_name):
701
    """Run multiple tests against a node.
702

703
    Test list:
704

705
      - compares ganeti version
706
      - checks vg existance and size > 20G
707
      - checks config file checksum
708
      - checks ssh to other nodes
709

710
    @type nodeinfo: L{objects.Node}
711
    @param nodeinfo: the node to check
712
    @param file_list: required list of files
713
    @param local_cksum: dictionary of local files and their checksums
714
    @param node_result: the results from the node
715
    @param feedback_fn: function used to accumulate results
716
    @param master_files: list of files that only masters should have
717
    @param drbd_map: the useddrbd minors for this node, in
718
        form of minor: (instance, must_exist) which correspond to instances
719
        and their running status
720
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
721

722
    """
723
    node = nodeinfo.name
724

    
725
    # main result, node_result should be a non-empty dict
726
    if not node_result or not isinstance(node_result, dict):
727
      feedback_fn("  - ERROR: unable to verify node %s." % (node,))
728
      return True
729

    
730
    # compares ganeti version
731
    local_version = constants.PROTOCOL_VERSION
732
    remote_version = node_result.get('version', None)
733
    if not (remote_version and isinstance(remote_version, (list, tuple)) and
734
            len(remote_version) == 2):
735
      feedback_fn("  - ERROR: connection to %s failed" % (node))
736
      return True
737

    
738
    if local_version != remote_version[0]:
739
      feedback_fn("  - ERROR: incompatible protocol versions: master %s,"
740
                  " node %s %s" % (local_version, node, remote_version[0]))
741
      return True
742

    
743
    # node seems compatible, we can actually try to look into its results
744

    
745
    bad = False
746

    
747
    # full package version
748
    if constants.RELEASE_VERSION != remote_version[1]:
749
      feedback_fn("  - WARNING: software version mismatch: master %s,"
750
                  " node %s %s" %
751
                  (constants.RELEASE_VERSION, node, remote_version[1]))
752

    
753
    # checks vg existence and size > 20G
754
    if vg_name is not None:
755
      vglist = node_result.get(constants.NV_VGLIST, None)
756
      if not vglist:
757
        feedback_fn("  - ERROR: unable to check volume groups on node %s." %
758
                        (node,))
759
        bad = True
760
      else:
761
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
762
                                              constants.MIN_VG_SIZE)
763
        if vgstatus:
764
          feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
765
          bad = True
766

    
767
    # checks config file checksum
768

    
769
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
770
    if not isinstance(remote_cksum, dict):
771
      bad = True
772
      feedback_fn("  - ERROR: node hasn't returned file checksum data")
773
    else:
774
      for file_name in file_list:
775
        node_is_mc = nodeinfo.master_candidate
776
        must_have_file = file_name not in master_files
777
        if file_name not in remote_cksum:
778
          if node_is_mc or must_have_file:
779
            bad = True
780
            feedback_fn("  - ERROR: file '%s' missing" % file_name)
781
        elif remote_cksum[file_name] != local_cksum[file_name]:
782
          if node_is_mc or must_have_file:
783
            bad = True
784
            feedback_fn("  - ERROR: file '%s' has wrong checksum" % file_name)
785
          else:
786
            # not candidate and this is not a must-have file
787
            bad = True
788
            feedback_fn("  - ERROR: non master-candidate has old/wrong file"
789
                        " '%s'" % file_name)
790
        else:
791
          # all good, except non-master/non-must have combination
792
          if not node_is_mc and not must_have_file:
793
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
794
                        " candidates" % file_name)
795

    
796
    # checks ssh to any
797

    
798
    if constants.NV_NODELIST not in node_result:
799
      bad = True
800
      feedback_fn("  - ERROR: node hasn't returned node ssh connectivity data")
801
    else:
802
      if node_result[constants.NV_NODELIST]:
803
        bad = True
804
        for node in node_result[constants.NV_NODELIST]:
805
          feedback_fn("  - ERROR: ssh communication with node '%s': %s" %
806
                          (node, node_result[constants.NV_NODELIST][node]))
807

    
808
    if constants.NV_NODENETTEST not in node_result:
809
      bad = True
810
      feedback_fn("  - ERROR: node hasn't returned node tcp connectivity data")
811
    else:
812
      if node_result[constants.NV_NODENETTEST]:
813
        bad = True
814
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
815
        for node in nlist:
816
          feedback_fn("  - ERROR: tcp communication with node '%s': %s" %
817
                          (node, node_result[constants.NV_NODENETTEST][node]))
818

    
819
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
820
    if isinstance(hyp_result, dict):
821
      for hv_name, hv_result in hyp_result.iteritems():
822
        if hv_result is not None:
823
          feedback_fn("  - ERROR: hypervisor %s verify failure: '%s'" %
824
                      (hv_name, hv_result))
825

    
826
    # check used drbd list
827
    if vg_name is not None:
828
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
829
      if not isinstance(used_minors, (tuple, list)):
830
        feedback_fn("  - ERROR: cannot parse drbd status file: %s" %
831
                    str(used_minors))
832
      else:
833
        for minor, (iname, must_exist) in drbd_map.items():
834
          if minor not in used_minors and must_exist:
835
            feedback_fn("  - ERROR: drbd minor %d of instance %s is"
836
                        " not active" % (minor, iname))
837
            bad = True
838
        for minor in used_minors:
839
          if minor not in drbd_map:
840
            feedback_fn("  - ERROR: unallocated drbd minor %d is in use" %
841
                        minor)
842
            bad = True
843

    
844
    return bad
845

    
846
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
847
                      node_instance, feedback_fn, n_offline):
848
    """Verify an instance.
849

850
    This function checks to see if the required block devices are
851
    available on the instance's node.
852

853
    """
854
    bad = False
855

    
856
    node_current = instanceconfig.primary_node
857

    
858
    node_vol_should = {}
859
    instanceconfig.MapLVsByNode(node_vol_should)
860

    
861
    for node in node_vol_should:
862
      if node in n_offline:
863
        # ignore missing volumes on offline nodes
864
        continue
865
      for volume in node_vol_should[node]:
866
        if node not in node_vol_is or volume not in node_vol_is[node]:
867
          feedback_fn("  - ERROR: volume %s missing on node %s" %
868
                          (volume, node))
869
          bad = True
870

    
871
    if instanceconfig.admin_up:
872
      if ((node_current not in node_instance or
873
          not instance in node_instance[node_current]) and
874
          node_current not in n_offline):
875
        feedback_fn("  - ERROR: instance %s not running on node %s" %
876
                        (instance, node_current))
877
        bad = True
878

    
879
    for node in node_instance:
880
      if (not node == node_current):
881
        if instance in node_instance[node]:
882
          feedback_fn("  - ERROR: instance %s should not run on node %s" %
883
                          (instance, node))
884
          bad = True
885

    
886
    return bad
887

    
888
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
889
    """Verify if there are any unknown volumes in the cluster.
890

891
    The .os, .swap and backup volumes are ignored. All other volumes are
892
    reported as unknown.
893

894
    """
895
    bad = False
896

    
897
    for node in node_vol_is:
898
      for volume in node_vol_is[node]:
899
        if node not in node_vol_should or volume not in node_vol_should[node]:
900
          feedback_fn("  - ERROR: volume %s on node %s should not exist" %
901
                      (volume, node))
902
          bad = True
903
    return bad
904

    
905
  def _VerifyOrphanInstances(self, instancelist, node_instance, feedback_fn):
906
    """Verify the list of running instances.
907

908
    This checks what instances are running but unknown to the cluster.
909

910
    """
911
    bad = False
912
    for node in node_instance:
913
      for runninginstance in node_instance[node]:
914
        if runninginstance not in instancelist:
915
          feedback_fn("  - ERROR: instance %s on node %s should not exist" %
916
                          (runninginstance, node))
917
          bad = True
918
    return bad
919

    
920
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg, feedback_fn):
921
    """Verify N+1 Memory Resilience.
922

923
    Check that if one single node dies we can still start all the instances it
924
    was primary for.
925

926
    """
927
    bad = False
928

    
929
    for node, nodeinfo in node_info.iteritems():
930
      # This code checks that every node which is now listed as secondary has
931
      # enough memory to host all instances it is supposed to should a single
932
      # other node in the cluster fail.
933
      # FIXME: not ready for failover to an arbitrary node
934
      # FIXME: does not support file-backed instances
935
      # WARNING: we currently take into account down instances as well as up
936
      # ones, considering that even if they're down someone might want to start
937
      # them even in the event of a node failure.
938
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
939
        needed_mem = 0
940
        for instance in instances:
941
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
942
          if bep[constants.BE_AUTO_BALANCE]:
943
            needed_mem += bep[constants.BE_MEMORY]
944
        if nodeinfo['mfree'] < needed_mem:
945
          feedback_fn("  - ERROR: not enough memory on node %s to accomodate"
946
                      " failovers should node %s fail" % (node, prinode))
947
          bad = True
948
    return bad
949

    
950
  def CheckPrereq(self):
951
    """Check prerequisites.
952

953
    Transform the list of checks we're going to skip into a set and check that
954
    all its members are valid.
955

956
    """
957
    self.skip_set = frozenset(self.op.skip_checks)
958
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
959
      raise errors.OpPrereqError("Invalid checks to be skipped specified")
960

    
961
  def BuildHooksEnv(self):
962
    """Build hooks env.
963

964
    Cluster-Verify hooks just rone in the post phase and their failure makes
965
    the output be logged in the verify output and the verification to fail.
966

967
    """
968
    all_nodes = self.cfg.GetNodeList()
969
    env = {
970
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
971
      }
972
    for node in self.cfg.GetAllNodesInfo().values():
973
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
974

    
975
    return env, [], all_nodes
976

    
977
  def Exec(self, feedback_fn):
978
    """Verify integrity of cluster, performing various test on nodes.
979

980
    """
981
    bad = False
982
    feedback_fn("* Verifying global settings")
983
    for msg in self.cfg.VerifyConfig():
984
      feedback_fn("  - ERROR: %s" % msg)
985

    
986
    vg_name = self.cfg.GetVGName()
987
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
988
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
989
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
990
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
991
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
992
                        for iname in instancelist)
993
    i_non_redundant = [] # Non redundant instances
994
    i_non_a_balanced = [] # Non auto-balanced instances
995
    n_offline = [] # List of offline nodes
996
    n_drained = [] # List of nodes being drained
997
    node_volume = {}
998
    node_instance = {}
999
    node_info = {}
1000
    instance_cfg = {}
1001

    
1002
    # FIXME: verify OS list
1003
    # do local checksums
1004
    master_files = [constants.CLUSTER_CONF_FILE]
1005

    
1006
    file_names = ssconf.SimpleStore().GetFileList()
1007
    file_names.append(constants.SSL_CERT_FILE)
1008
    file_names.append(constants.RAPI_CERT_FILE)
1009
    file_names.extend(master_files)
1010

    
1011
    local_checksums = utils.FingerprintFiles(file_names)
1012

    
1013
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1014
    node_verify_param = {
1015
      constants.NV_FILELIST: file_names,
1016
      constants.NV_NODELIST: [node.name for node in nodeinfo
1017
                              if not node.offline],
1018
      constants.NV_HYPERVISOR: hypervisors,
1019
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1020
                                  node.secondary_ip) for node in nodeinfo
1021
                                 if not node.offline],
1022
      constants.NV_INSTANCELIST: hypervisors,
1023
      constants.NV_VERSION: None,
1024
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1025
      }
1026
    if vg_name is not None:
1027
      node_verify_param[constants.NV_VGLIST] = None
1028
      node_verify_param[constants.NV_LVLIST] = vg_name
1029
      node_verify_param[constants.NV_DRBDLIST] = None
1030
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1031
                                           self.cfg.GetClusterName())
1032

    
1033
    cluster = self.cfg.GetClusterInfo()
1034
    master_node = self.cfg.GetMasterNode()
1035
    all_drbd_map = self.cfg.ComputeDRBDMap()
1036

    
1037
    for node_i in nodeinfo:
1038
      node = node_i.name
1039

    
1040
      if node_i.offline:
1041
        feedback_fn("* Skipping offline node %s" % (node,))
1042
        n_offline.append(node)
1043
        continue
1044

    
1045
      if node == master_node:
1046
        ntype = "master"
1047
      elif node_i.master_candidate:
1048
        ntype = "master candidate"
1049
      elif node_i.drained:
1050
        ntype = "drained"
1051
        n_drained.append(node)
1052
      else:
1053
        ntype = "regular"
1054
      feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1055

    
1056
      msg = all_nvinfo[node].fail_msg
1057
      if msg:
1058
        feedback_fn("  - ERROR: while contacting node %s: %s" % (node, msg))
1059
        bad = True
1060
        continue
1061

    
1062
      nresult = all_nvinfo[node].payload
1063
      node_drbd = {}
1064
      for minor, instance in all_drbd_map[node].items():
1065
        if instance not in instanceinfo:
1066
          feedback_fn("  - ERROR: ghost instance '%s' in temporary DRBD map" %
1067
                      instance)
1068
          # ghost instance should not be running, but otherwise we
1069
          # don't give double warnings (both ghost instance and
1070
          # unallocated minor in use)
1071
          node_drbd[minor] = (instance, False)
1072
        else:
1073
          instance = instanceinfo[instance]
1074
          node_drbd[minor] = (instance.name, instance.admin_up)
1075
      result = self._VerifyNode(node_i, file_names, local_checksums,
1076
                                nresult, feedback_fn, master_files,
1077
                                node_drbd, vg_name)
1078
      bad = bad or result
1079

    
1080
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1081
      if vg_name is None:
1082
        node_volume[node] = {}
1083
      elif isinstance(lvdata, basestring):
1084
        feedback_fn("  - ERROR: LVM problem on node %s: %s" %
1085
                    (node, utils.SafeEncode(lvdata)))
1086
        bad = True
1087
        node_volume[node] = {}
1088
      elif not isinstance(lvdata, dict):
1089
        feedback_fn("  - ERROR: connection to %s failed (lvlist)" % (node,))
1090
        bad = True
1091
        continue
1092
      else:
1093
        node_volume[node] = lvdata
1094

    
1095
      # node_instance
1096
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1097
      if not isinstance(idata, list):
1098
        feedback_fn("  - ERROR: connection to %s failed (instancelist)" %
1099
                    (node,))
1100
        bad = True
1101
        continue
1102

    
1103
      node_instance[node] = idata
1104

    
1105
      # node_info
1106
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1107
      if not isinstance(nodeinfo, dict):
1108
        feedback_fn("  - ERROR: connection to %s failed (hvinfo)" % (node,))
1109
        bad = True
1110
        continue
1111

    
1112
      try:
1113
        node_info[node] = {
1114
          "mfree": int(nodeinfo['memory_free']),
1115
          "pinst": [],
1116
          "sinst": [],
1117
          # dictionary holding all instances this node is secondary for,
1118
          # grouped by their primary node. Each key is a cluster node, and each
1119
          # value is a list of instances which have the key as primary and the
1120
          # current node as secondary.  this is handy to calculate N+1 memory
1121
          # availability if you can only failover from a primary to its
1122
          # secondary.
1123
          "sinst-by-pnode": {},
1124
        }
1125
        # FIXME: devise a free space model for file based instances as well
1126
        if vg_name is not None:
1127
          if (constants.NV_VGLIST not in nresult or
1128
              vg_name not in nresult[constants.NV_VGLIST]):
1129
            feedback_fn("  - ERROR: node %s didn't return data for the"
1130
                        " volume group '%s' - it is either missing or broken" %
1131
                        (node, vg_name))
1132
            bad = True
1133
            continue
1134
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1135
      except (ValueError, KeyError):
1136
        feedback_fn("  - ERROR: invalid nodeinfo value returned"
1137
                    " from node %s" % (node,))
1138
        bad = True
1139
        continue
1140

    
1141
    node_vol_should = {}
1142

    
1143
    for instance in instancelist:
1144
      feedback_fn("* Verifying instance %s" % instance)
1145
      inst_config = instanceinfo[instance]
1146
      result =  self._VerifyInstance(instance, inst_config, node_volume,
1147
                                     node_instance, feedback_fn, n_offline)
1148
      bad = bad or result
1149
      inst_nodes_offline = []
1150

    
1151
      inst_config.MapLVsByNode(node_vol_should)
1152

    
1153
      instance_cfg[instance] = inst_config
1154

    
1155
      pnode = inst_config.primary_node
1156
      if pnode in node_info:
1157
        node_info[pnode]['pinst'].append(instance)
1158
      elif pnode not in n_offline:
1159
        feedback_fn("  - ERROR: instance %s, connection to primary node"
1160
                    " %s failed" % (instance, pnode))
1161
        bad = True
1162

    
1163
      if pnode in n_offline:
1164
        inst_nodes_offline.append(pnode)
1165

    
1166
      # If the instance is non-redundant we cannot survive losing its primary
1167
      # node, so we are not N+1 compliant. On the other hand we have no disk
1168
      # templates with more than one secondary so that situation is not well
1169
      # supported either.
1170
      # FIXME: does not support file-backed instances
1171
      if len(inst_config.secondary_nodes) == 0:
1172
        i_non_redundant.append(instance)
1173
      elif len(inst_config.secondary_nodes) > 1:
1174
        feedback_fn("  - WARNING: multiple secondaries for instance %s"
1175
                    % instance)
1176

    
1177
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1178
        i_non_a_balanced.append(instance)
1179

    
1180
      for snode in inst_config.secondary_nodes:
1181
        if snode in node_info:
1182
          node_info[snode]['sinst'].append(instance)
1183
          if pnode not in node_info[snode]['sinst-by-pnode']:
1184
            node_info[snode]['sinst-by-pnode'][pnode] = []
1185
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1186
        elif snode not in n_offline:
1187
          feedback_fn("  - ERROR: instance %s, connection to secondary node"
1188
                      " %s failed" % (instance, snode))
1189
          bad = True
1190
        if snode in n_offline:
1191
          inst_nodes_offline.append(snode)
1192

    
1193
      if inst_nodes_offline:
1194
        # warn that the instance lives on offline nodes, and set bad=True
1195
        feedback_fn("  - ERROR: instance lives on offline node(s) %s" %
1196
                    ", ".join(inst_nodes_offline))
1197
        bad = True
1198

    
1199
    feedback_fn("* Verifying orphan volumes")
1200
    result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
1201
                                       feedback_fn)
1202
    bad = bad or result
1203

    
1204
    feedback_fn("* Verifying remaining instances")
1205
    result = self._VerifyOrphanInstances(instancelist, node_instance,
1206
                                         feedback_fn)
1207
    bad = bad or result
1208

    
1209
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1210
      feedback_fn("* Verifying N+1 Memory redundancy")
1211
      result = self._VerifyNPlusOneMemory(node_info, instance_cfg, feedback_fn)
1212
      bad = bad or result
1213

    
1214
    feedback_fn("* Other Notes")
1215
    if i_non_redundant:
1216
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1217
                  % len(i_non_redundant))
1218

    
1219
    if i_non_a_balanced:
1220
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1221
                  % len(i_non_a_balanced))
1222

    
1223
    if n_offline:
1224
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1225

    
1226
    if n_drained:
1227
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1228

    
1229
    return not bad
1230

    
1231
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1232
    """Analize the post-hooks' result
1233

1234
    This method analyses the hook result, handles it, and sends some
1235
    nicely-formatted feedback back to the user.
1236

1237
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1238
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1239
    @param hooks_results: the results of the multi-node hooks rpc call
1240
    @param feedback_fn: function used send feedback back to the caller
1241
    @param lu_result: previous Exec result
1242
    @return: the new Exec result, based on the previous result
1243
        and hook results
1244

1245
    """
1246
    # We only really run POST phase hooks, and are only interested in
1247
    # their results
1248
    if phase == constants.HOOKS_PHASE_POST:
1249
      # Used to change hooks' output to proper indentation
1250
      indent_re = re.compile('^', re.M)
1251
      feedback_fn("* Hooks Results")
1252
      if not hooks_results:
1253
        feedback_fn("  - ERROR: general communication failure")
1254
        lu_result = 1
1255
      else:
1256
        for node_name in hooks_results:
1257
          show_node_header = True
1258
          res = hooks_results[node_name]
1259
          msg = res.fail_msg
1260
          if msg:
1261
            if res.offline:
1262
              # no need to warn or set fail return value
1263
              continue
1264
            feedback_fn("    Communication failure in hooks execution: %s" %
1265
                        msg)
1266
            lu_result = 1
1267
            continue
1268
          for script, hkr, output in res.payload:
1269
            if hkr == constants.HKR_FAIL:
1270
              # The node header is only shown once, if there are
1271
              # failing hooks on that node
1272
              if show_node_header:
1273
                feedback_fn("  Node %s:" % node_name)
1274
                show_node_header = False
1275
              feedback_fn("    ERROR: Script %s failed, output:" % script)
1276
              output = indent_re.sub('      ', output)
1277
              feedback_fn("%s" % output)
1278
              lu_result = 1
1279

    
1280
      return lu_result
1281

    
1282

    
1283
class LUVerifyDisks(NoHooksLU):
1284
  """Verifies the cluster disks status.
1285

1286
  """
1287
  _OP_REQP = []
1288
  REQ_BGL = False
1289

    
1290
  def ExpandNames(self):
1291
    self.needed_locks = {
1292
      locking.LEVEL_NODE: locking.ALL_SET,
1293
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1294
    }
1295
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1296

    
1297
  def CheckPrereq(self):
1298
    """Check prerequisites.
1299

1300
    This has no prerequisites.
1301

1302
    """
1303
    pass
1304

    
1305
  def Exec(self, feedback_fn):
1306
    """Verify integrity of cluster disks.
1307

1308
    @rtype: tuple of three items
1309
    @return: a tuple of (dict of node-to-node_error, list of instances
1310
        which need activate-disks, dict of instance: (node, volume) for
1311
        missing volumes
1312

1313
    """
1314
    result = res_nodes, res_instances, res_missing = {}, [], {}
1315

    
1316
    vg_name = self.cfg.GetVGName()
1317
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1318
    instances = [self.cfg.GetInstanceInfo(name)
1319
                 for name in self.cfg.GetInstanceList()]
1320

    
1321
    nv_dict = {}
1322
    for inst in instances:
1323
      inst_lvs = {}
1324
      if (not inst.admin_up or
1325
          inst.disk_template not in constants.DTS_NET_MIRROR):
1326
        continue
1327
      inst.MapLVsByNode(inst_lvs)
1328
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1329
      for node, vol_list in inst_lvs.iteritems():
1330
        for vol in vol_list:
1331
          nv_dict[(node, vol)] = inst
1332

    
1333
    if not nv_dict:
1334
      return result
1335

    
1336
    node_lvs = self.rpc.call_volume_list(nodes, vg_name)
1337

    
1338
    to_act = set()
1339
    for node in nodes:
1340
      # node_volume
1341
      node_res = node_lvs[node]
1342
      if node_res.offline:
1343
        continue
1344
      msg = node_res.fail_msg
1345
      if msg:
1346
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1347
        res_nodes[node] = msg
1348
        continue
1349

    
1350
      lvs = node_res.payload
1351
      for lv_name, (_, lv_inactive, lv_online) in lvs.items():
1352
        inst = nv_dict.pop((node, lv_name), None)
1353
        if (not lv_online and inst is not None
1354
            and inst.name not in res_instances):
1355
          res_instances.append(inst.name)
1356

    
1357
    # any leftover items in nv_dict are missing LVs, let's arrange the
1358
    # data better
1359
    for key, inst in nv_dict.iteritems():
1360
      if inst.name not in res_missing:
1361
        res_missing[inst.name] = []
1362
      res_missing[inst.name].append(key)
1363

    
1364
    return result
1365

    
1366

    
1367
class LURenameCluster(LogicalUnit):
1368
  """Rename the cluster.
1369

1370
  """
1371
  HPATH = "cluster-rename"
1372
  HTYPE = constants.HTYPE_CLUSTER
1373
  _OP_REQP = ["name"]
1374

    
1375
  def BuildHooksEnv(self):
1376
    """Build hooks env.
1377

1378
    """
1379
    env = {
1380
      "OP_TARGET": self.cfg.GetClusterName(),
1381
      "NEW_NAME": self.op.name,
1382
      }
1383
    mn = self.cfg.GetMasterNode()
1384
    return env, [mn], [mn]
1385

    
1386
  def CheckPrereq(self):
1387
    """Verify that the passed name is a valid one.
1388

1389
    """
1390
    hostname = utils.HostInfo(self.op.name)
1391

    
1392
    new_name = hostname.name
1393
    self.ip = new_ip = hostname.ip
1394
    old_name = self.cfg.GetClusterName()
1395
    old_ip = self.cfg.GetMasterIP()
1396
    if new_name == old_name and new_ip == old_ip:
1397
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1398
                                 " cluster has changed")
1399
    if new_ip != old_ip:
1400
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1401
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1402
                                   " reachable on the network. Aborting." %
1403
                                   new_ip)
1404

    
1405
    self.op.name = new_name
1406

    
1407
  def Exec(self, feedback_fn):
1408
    """Rename the cluster.
1409

1410
    """
1411
    clustername = self.op.name
1412
    ip = self.ip
1413

    
1414
    # shutdown the master IP
1415
    master = self.cfg.GetMasterNode()
1416
    result = self.rpc.call_node_stop_master(master, False)
1417
    result.Raise("Could not disable the master role")
1418

    
1419
    try:
1420
      cluster = self.cfg.GetClusterInfo()
1421
      cluster.cluster_name = clustername
1422
      cluster.master_ip = ip
1423
      self.cfg.Update(cluster)
1424

    
1425
      # update the known hosts file
1426
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1427
      node_list = self.cfg.GetNodeList()
1428
      try:
1429
        node_list.remove(master)
1430
      except ValueError:
1431
        pass
1432
      result = self.rpc.call_upload_file(node_list,
1433
                                         constants.SSH_KNOWN_HOSTS_FILE)
1434
      for to_node, to_result in result.iteritems():
1435
        msg = to_result.fail_msg
1436
        if msg:
1437
          msg = ("Copy of file %s to node %s failed: %s" %
1438
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1439
          self.proc.LogWarning(msg)
1440

    
1441
    finally:
1442
      result = self.rpc.call_node_start_master(master, False)
1443
      msg = result.fail_msg
1444
      if msg:
1445
        self.LogWarning("Could not re-enable the master role on"
1446
                        " the master, please restart manually: %s", msg)
1447

    
1448

    
1449
def _RecursiveCheckIfLVMBased(disk):
1450
  """Check if the given disk or its children are lvm-based.
1451

1452
  @type disk: L{objects.Disk}
1453
  @param disk: the disk to check
1454
  @rtype: booleean
1455
  @return: boolean indicating whether a LD_LV dev_type was found or not
1456

1457
  """
1458
  if disk.children:
1459
    for chdisk in disk.children:
1460
      if _RecursiveCheckIfLVMBased(chdisk):
1461
        return True
1462
  return disk.dev_type == constants.LD_LV
1463

    
1464

    
1465
class LUSetClusterParams(LogicalUnit):
1466
  """Change the parameters of the cluster.
1467

1468
  """
1469
  HPATH = "cluster-modify"
1470
  HTYPE = constants.HTYPE_CLUSTER
1471
  _OP_REQP = []
1472
  REQ_BGL = False
1473

    
1474
  def CheckArguments(self):
1475
    """Check parameters
1476

1477
    """
1478
    if not hasattr(self.op, "candidate_pool_size"):
1479
      self.op.candidate_pool_size = None
1480
    if self.op.candidate_pool_size is not None:
1481
      try:
1482
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1483
      except (ValueError, TypeError), err:
1484
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1485
                                   str(err))
1486
      if self.op.candidate_pool_size < 1:
1487
        raise errors.OpPrereqError("At least one master candidate needed")
1488

    
1489
  def ExpandNames(self):
1490
    # FIXME: in the future maybe other cluster params won't require checking on
1491
    # all nodes to be modified.
1492
    self.needed_locks = {
1493
      locking.LEVEL_NODE: locking.ALL_SET,
1494
    }
1495
    self.share_locks[locking.LEVEL_NODE] = 1
1496

    
1497
  def BuildHooksEnv(self):
1498
    """Build hooks env.
1499

1500
    """
1501
    env = {
1502
      "OP_TARGET": self.cfg.GetClusterName(),
1503
      "NEW_VG_NAME": self.op.vg_name,
1504
      }
1505
    mn = self.cfg.GetMasterNode()
1506
    return env, [mn], [mn]
1507

    
1508
  def CheckPrereq(self):
1509
    """Check prerequisites.
1510

1511
    This checks whether the given params don't conflict and
1512
    if the given volume group is valid.
1513

1514
    """
1515
    if self.op.vg_name is not None and not self.op.vg_name:
1516
      instances = self.cfg.GetAllInstancesInfo().values()
1517
      for inst in instances:
1518
        for disk in inst.disks:
1519
          if _RecursiveCheckIfLVMBased(disk):
1520
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1521
                                       " lvm-based instances exist")
1522

    
1523
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1524

    
1525
    # if vg_name not None, checks given volume group on all nodes
1526
    if self.op.vg_name:
1527
      vglist = self.rpc.call_vg_list(node_list)
1528
      for node in node_list:
1529
        msg = vglist[node].fail_msg
1530
        if msg:
1531
          # ignoring down node
1532
          self.LogWarning("Error while gathering data on node %s"
1533
                          " (ignoring node): %s", node, msg)
1534
          continue
1535
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
1536
                                              self.op.vg_name,
1537
                                              constants.MIN_VG_SIZE)
1538
        if vgstatus:
1539
          raise errors.OpPrereqError("Error on node '%s': %s" %
1540
                                     (node, vgstatus))
1541

    
1542
    self.cluster = cluster = self.cfg.GetClusterInfo()
1543
    # validate params changes
1544
    if self.op.beparams:
1545
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1546
      self.new_beparams = objects.FillDict(
1547
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
1548

    
1549
    if self.op.nicparams:
1550
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
1551
      self.new_nicparams = objects.FillDict(
1552
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
1553
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
1554

    
1555
    # hypervisor list/parameters
1556
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
1557
    if self.op.hvparams:
1558
      if not isinstance(self.op.hvparams, dict):
1559
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
1560
      for hv_name, hv_dict in self.op.hvparams.items():
1561
        if hv_name not in self.new_hvparams:
1562
          self.new_hvparams[hv_name] = hv_dict
1563
        else:
1564
          self.new_hvparams[hv_name].update(hv_dict)
1565

    
1566
    if self.op.enabled_hypervisors is not None:
1567
      self.hv_list = self.op.enabled_hypervisors
1568
    else:
1569
      self.hv_list = cluster.enabled_hypervisors
1570

    
1571
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
1572
      # either the enabled list has changed, or the parameters have, validate
1573
      for hv_name, hv_params in self.new_hvparams.items():
1574
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
1575
            (self.op.enabled_hypervisors and
1576
             hv_name in self.op.enabled_hypervisors)):
1577
          # either this is a new hypervisor, or its parameters have changed
1578
          hv_class = hypervisor.GetHypervisor(hv_name)
1579
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1580
          hv_class.CheckParameterSyntax(hv_params)
1581
          _CheckHVParams(self, node_list, hv_name, hv_params)
1582

    
1583
  def Exec(self, feedback_fn):
1584
    """Change the parameters of the cluster.
1585

1586
    """
1587
    if self.op.vg_name is not None:
1588
      new_volume = self.op.vg_name
1589
      if not new_volume:
1590
        new_volume = None
1591
      if new_volume != self.cfg.GetVGName():
1592
        self.cfg.SetVGName(new_volume)
1593
      else:
1594
        feedback_fn("Cluster LVM configuration already in desired"
1595
                    " state, not changing")
1596
    if self.op.hvparams:
1597
      self.cluster.hvparams = self.new_hvparams
1598
    if self.op.enabled_hypervisors is not None:
1599
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1600
    if self.op.beparams:
1601
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
1602
    if self.op.nicparams:
1603
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
1604

    
1605
    if self.op.candidate_pool_size is not None:
1606
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
1607

    
1608
    self.cfg.Update(self.cluster)
1609

    
1610
    # we want to update nodes after the cluster so that if any errors
1611
    # happen, we have recorded and saved the cluster info
1612
    if self.op.candidate_pool_size is not None:
1613
      _AdjustCandidatePool(self)
1614

    
1615

    
1616
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
1617
  """Distribute additional files which are part of the cluster configuration.
1618

1619
  ConfigWriter takes care of distributing the config and ssconf files, but
1620
  there are more files which should be distributed to all nodes. This function
1621
  makes sure those are copied.
1622

1623
  @param lu: calling logical unit
1624
  @param additional_nodes: list of nodes not in the config to distribute to
1625

1626
  """
1627
  # 1. Gather target nodes
1628
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
1629
  dist_nodes = lu.cfg.GetNodeList()
1630
  if additional_nodes is not None:
1631
    dist_nodes.extend(additional_nodes)
1632
  if myself.name in dist_nodes:
1633
    dist_nodes.remove(myself.name)
1634
  # 2. Gather files to distribute
1635
  dist_files = set([constants.ETC_HOSTS,
1636
                    constants.SSH_KNOWN_HOSTS_FILE,
1637
                    constants.RAPI_CERT_FILE,
1638
                    constants.RAPI_USERS_FILE,
1639
                   ])
1640

    
1641
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
1642
  for hv_name in enabled_hypervisors:
1643
    hv_class = hypervisor.GetHypervisor(hv_name)
1644
    dist_files.update(hv_class.GetAncillaryFiles())
1645

    
1646
  # 3. Perform the files upload
1647
  for fname in dist_files:
1648
    if os.path.exists(fname):
1649
      result = lu.rpc.call_upload_file(dist_nodes, fname)
1650
      for to_node, to_result in result.items():
1651
        msg = to_result.fail_msg
1652
        if msg:
1653
          msg = ("Copy of file %s to node %s failed: %s" %
1654
                 (fname, to_node, msg))
1655
          lu.proc.LogWarning(msg)
1656

    
1657

    
1658
class LURedistributeConfig(NoHooksLU):
1659
  """Force the redistribution of cluster configuration.
1660

1661
  This is a very simple LU.
1662

1663
  """
1664
  _OP_REQP = []
1665
  REQ_BGL = False
1666

    
1667
  def ExpandNames(self):
1668
    self.needed_locks = {
1669
      locking.LEVEL_NODE: locking.ALL_SET,
1670
    }
1671
    self.share_locks[locking.LEVEL_NODE] = 1
1672

    
1673
  def CheckPrereq(self):
1674
    """Check prerequisites.
1675

1676
    """
1677

    
1678
  def Exec(self, feedback_fn):
1679
    """Redistribute the configuration.
1680

1681
    """
1682
    self.cfg.Update(self.cfg.GetClusterInfo())
1683
    _RedistributeAncillaryFiles(self)
1684

    
1685

    
1686
def _WaitForSync(lu, instance, oneshot=False, unlock=False):
1687
  """Sleep and poll for an instance's disk to sync.
1688

1689
  """
1690
  if not instance.disks:
1691
    return True
1692

    
1693
  if not oneshot:
1694
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
1695

    
1696
  node = instance.primary_node
1697

    
1698
  for dev in instance.disks:
1699
    lu.cfg.SetDiskID(dev, node)
1700

    
1701
  retries = 0
1702
  degr_retries = 10 # in seconds, as we sleep 1 second each time
1703
  while True:
1704
    max_time = 0
1705
    done = True
1706
    cumul_degraded = False
1707
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
1708
    msg = rstats.fail_msg
1709
    if msg:
1710
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
1711
      retries += 1
1712
      if retries >= 10:
1713
        raise errors.RemoteError("Can't contact node %s for mirror data,"
1714
                                 " aborting." % node)
1715
      time.sleep(6)
1716
      continue
1717
    rstats = rstats.payload
1718
    retries = 0
1719
    for i, mstat in enumerate(rstats):
1720
      if mstat is None:
1721
        lu.LogWarning("Can't compute data for node %s/%s",
1722
                           node, instance.disks[i].iv_name)
1723
        continue
1724
      # we ignore the ldisk parameter
1725
      perc_done, est_time, is_degraded, _ = mstat
1726
      cumul_degraded = cumul_degraded or (is_degraded and perc_done is None)
1727
      if perc_done is not None:
1728
        done = False
1729
        if est_time is not None:
1730
          rem_time = "%d estimated seconds remaining" % est_time
1731
          max_time = est_time
1732
        else:
1733
          rem_time = "no time estimate"
1734
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
1735
                        (instance.disks[i].iv_name, perc_done, rem_time))
1736

    
1737
    # if we're done but degraded, let's do a few small retries, to
1738
    # make sure we see a stable and not transient situation; therefore
1739
    # we force restart of the loop
1740
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
1741
      logging.info("Degraded disks found, %d retries left", degr_retries)
1742
      degr_retries -= 1
1743
      time.sleep(1)
1744
      continue
1745

    
1746
    if done or oneshot:
1747
      break
1748

    
1749
    time.sleep(min(60, max_time))
1750

    
1751
  if done:
1752
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
1753
  return not cumul_degraded
1754

    
1755

    
1756
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
1757
  """Check that mirrors are not degraded.
1758

1759
  The ldisk parameter, if True, will change the test from the
1760
  is_degraded attribute (which represents overall non-ok status for
1761
  the device(s)) to the ldisk (representing the local storage status).
1762

1763
  """
1764
  lu.cfg.SetDiskID(dev, node)
1765
  if ldisk:
1766
    idx = 6
1767
  else:
1768
    idx = 5
1769

    
1770
  result = True
1771
  if on_primary or dev.AssembleOnSecondary():
1772
    rstats = lu.rpc.call_blockdev_find(node, dev)
1773
    msg = rstats.fail_msg
1774
    if msg:
1775
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
1776
      result = False
1777
    elif not rstats.payload:
1778
      lu.LogWarning("Can't find disk on node %s", node)
1779
      result = False
1780
    else:
1781
      result = result and (not rstats.payload[idx])
1782
  if dev.children:
1783
    for child in dev.children:
1784
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
1785

    
1786
  return result
1787

    
1788

    
1789
class LUDiagnoseOS(NoHooksLU):
1790
  """Logical unit for OS diagnose/query.
1791

1792
  """
1793
  _OP_REQP = ["output_fields", "names"]
1794
  REQ_BGL = False
1795
  _FIELDS_STATIC = utils.FieldSet()
1796
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status")
1797

    
1798
  def ExpandNames(self):
1799
    if self.op.names:
1800
      raise errors.OpPrereqError("Selective OS query not supported")
1801

    
1802
    _CheckOutputFields(static=self._FIELDS_STATIC,
1803
                       dynamic=self._FIELDS_DYNAMIC,
1804
                       selected=self.op.output_fields)
1805

    
1806
    # Lock all nodes, in shared mode
1807
    # Temporary removal of locks, should be reverted later
1808
    # TODO: reintroduce locks when they are lighter-weight
1809
    self.needed_locks = {}
1810
    #self.share_locks[locking.LEVEL_NODE] = 1
1811
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
1812

    
1813
  def CheckPrereq(self):
1814
    """Check prerequisites.
1815

1816
    """
1817

    
1818
  @staticmethod
1819
  def _DiagnoseByOS(node_list, rlist):
1820
    """Remaps a per-node return list into an a per-os per-node dictionary
1821

1822
    @param node_list: a list with the names of all nodes
1823
    @param rlist: a map with node names as keys and OS objects as values
1824

1825
    @rtype: dict
1826
    @return: a dictionary with osnames as keys and as value another map, with
1827
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
1828

1829
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
1830
                                     (/srv/..., False, "invalid api")],
1831
                           "node2": [(/srv/..., True, "")]}
1832
          }
1833

1834
    """
1835
    all_os = {}
1836
    # we build here the list of nodes that didn't fail the RPC (at RPC
1837
    # level), so that nodes with a non-responding node daemon don't
1838
    # make all OSes invalid
1839
    good_nodes = [node_name for node_name in rlist
1840
                  if not rlist[node_name].fail_msg]
1841
    for node_name, nr in rlist.items():
1842
      if nr.fail_msg or not nr.payload:
1843
        continue
1844
      for name, path, status, diagnose in nr.payload:
1845
        if name not in all_os:
1846
          # build a list of nodes for this os containing empty lists
1847
          # for each node in node_list
1848
          all_os[name] = {}
1849
          for nname in good_nodes:
1850
            all_os[name][nname] = []
1851
        all_os[name][node_name].append((path, status, diagnose))
1852
    return all_os
1853

    
1854
  def Exec(self, feedback_fn):
1855
    """Compute the list of OSes.
1856

1857
    """
1858
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
1859
    node_data = self.rpc.call_os_diagnose(valid_nodes)
1860
    pol = self._DiagnoseByOS(valid_nodes, node_data)
1861
    output = []
1862
    for os_name, os_data in pol.items():
1863
      row = []
1864
      for field in self.op.output_fields:
1865
        if field == "name":
1866
          val = os_name
1867
        elif field == "valid":
1868
          val = utils.all([osl and osl[0][1] for osl in os_data.values()])
1869
        elif field == "node_status":
1870
          # this is just a copy of the dict
1871
          val = {}
1872
          for node_name, nos_list in os_data.items():
1873
            val[node_name] = nos_list
1874
        else:
1875
          raise errors.ParameterError(field)
1876
        row.append(val)
1877
      output.append(row)
1878

    
1879
    return output
1880

    
1881

    
1882
class LURemoveNode(LogicalUnit):
1883
  """Logical unit for removing a node.
1884

1885
  """
1886
  HPATH = "node-remove"
1887
  HTYPE = constants.HTYPE_NODE
1888
  _OP_REQP = ["node_name"]
1889

    
1890
  def BuildHooksEnv(self):
1891
    """Build hooks env.
1892

1893
    This doesn't run on the target node in the pre phase as a failed
1894
    node would then be impossible to remove.
1895

1896
    """
1897
    env = {
1898
      "OP_TARGET": self.op.node_name,
1899
      "NODE_NAME": self.op.node_name,
1900
      }
1901
    all_nodes = self.cfg.GetNodeList()
1902
    all_nodes.remove(self.op.node_name)
1903
    return env, all_nodes, all_nodes
1904

    
1905
  def CheckPrereq(self):
1906
    """Check prerequisites.
1907

1908
    This checks:
1909
     - the node exists in the configuration
1910
     - it does not have primary or secondary instances
1911
     - it's not the master
1912

1913
    Any errors are signalled by raising errors.OpPrereqError.
1914

1915
    """
1916
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
1917
    if node is None:
1918
      raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
1919

    
1920
    instance_list = self.cfg.GetInstanceList()
1921

    
1922
    masternode = self.cfg.GetMasterNode()
1923
    if node.name == masternode:
1924
      raise errors.OpPrereqError("Node is the master node,"
1925
                                 " you need to failover first.")
1926

    
1927
    for instance_name in instance_list:
1928
      instance = self.cfg.GetInstanceInfo(instance_name)
1929
      if node.name in instance.all_nodes:
1930
        raise errors.OpPrereqError("Instance %s is still running on the node,"
1931
                                   " please remove first." % instance_name)
1932
    self.op.node_name = node.name
1933
    self.node = node
1934

    
1935
  def Exec(self, feedback_fn):
1936
    """Removes the node from the cluster.
1937

1938
    """
1939
    node = self.node
1940
    logging.info("Stopping the node daemon and removing configs from node %s",
1941
                 node.name)
1942

    
1943
    self.context.RemoveNode(node.name)
1944

    
1945
    result = self.rpc.call_node_leave_cluster(node.name)
1946
    msg = result.fail_msg
1947
    if msg:
1948
      self.LogWarning("Errors encountered on the remote node while leaving"
1949
                      " the cluster: %s", msg)
1950

    
1951
    # Promote nodes to master candidate as needed
1952
    _AdjustCandidatePool(self)
1953

    
1954

    
1955
class LUQueryNodes(NoHooksLU):
1956
  """Logical unit for querying nodes.
1957

1958
  """
1959
  _OP_REQP = ["output_fields", "names", "use_locking"]
1960
  REQ_BGL = False
1961
  _FIELDS_DYNAMIC = utils.FieldSet(
1962
    "dtotal", "dfree",
1963
    "mtotal", "mnode", "mfree",
1964
    "bootid",
1965
    "ctotal", "cnodes", "csockets",
1966
    )
1967

    
1968
  _FIELDS_STATIC = utils.FieldSet(
1969
    "name", "pinst_cnt", "sinst_cnt",
1970
    "pinst_list", "sinst_list",
1971
    "pip", "sip", "tags",
1972
    "serial_no",
1973
    "master_candidate",
1974
    "master",
1975
    "offline",
1976
    "drained",
1977
    )
1978

    
1979
  def ExpandNames(self):
1980
    _CheckOutputFields(static=self._FIELDS_STATIC,
1981
                       dynamic=self._FIELDS_DYNAMIC,
1982
                       selected=self.op.output_fields)
1983

    
1984
    self.needed_locks = {}
1985
    self.share_locks[locking.LEVEL_NODE] = 1
1986

    
1987
    if self.op.names:
1988
      self.wanted = _GetWantedNodes(self, self.op.names)
1989
    else:
1990
      self.wanted = locking.ALL_SET
1991

    
1992
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
1993
    self.do_locking = self.do_node_query and self.op.use_locking
1994
    if self.do_locking:
1995
      # if we don't request only static fields, we need to lock the nodes
1996
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
1997

    
1998

    
1999
  def CheckPrereq(self):
2000
    """Check prerequisites.
2001

2002
    """
2003
    # The validation of the node list is done in the _GetWantedNodes,
2004
    # if non empty, and if empty, there's no validation to do
2005
    pass
2006

    
2007
  def Exec(self, feedback_fn):
2008
    """Computes the list of nodes and their attributes.
2009

2010
    """
2011
    all_info = self.cfg.GetAllNodesInfo()
2012
    if self.do_locking:
2013
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2014
    elif self.wanted != locking.ALL_SET:
2015
      nodenames = self.wanted
2016
      missing = set(nodenames).difference(all_info.keys())
2017
      if missing:
2018
        raise errors.OpExecError(
2019
          "Some nodes were removed before retrieving their data: %s" % missing)
2020
    else:
2021
      nodenames = all_info.keys()
2022

    
2023
    nodenames = utils.NiceSort(nodenames)
2024
    nodelist = [all_info[name] for name in nodenames]
2025

    
2026
    # begin data gathering
2027

    
2028
    if self.do_node_query:
2029
      live_data = {}
2030
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2031
                                          self.cfg.GetHypervisorType())
2032
      for name in nodenames:
2033
        nodeinfo = node_data[name]
2034
        if not nodeinfo.fail_msg and nodeinfo.payload:
2035
          nodeinfo = nodeinfo.payload
2036
          fn = utils.TryConvert
2037
          live_data[name] = {
2038
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2039
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2040
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2041
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2042
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2043
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2044
            "bootid": nodeinfo.get('bootid', None),
2045
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2046
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2047
            }
2048
        else:
2049
          live_data[name] = {}
2050
    else:
2051
      live_data = dict.fromkeys(nodenames, {})
2052

    
2053
    node_to_primary = dict([(name, set()) for name in nodenames])
2054
    node_to_secondary = dict([(name, set()) for name in nodenames])
2055

    
2056
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2057
                             "sinst_cnt", "sinst_list"))
2058
    if inst_fields & frozenset(self.op.output_fields):
2059
      instancelist = self.cfg.GetInstanceList()
2060

    
2061
      for instance_name in instancelist:
2062
        inst = self.cfg.GetInstanceInfo(instance_name)
2063
        if inst.primary_node in node_to_primary:
2064
          node_to_primary[inst.primary_node].add(inst.name)
2065
        for secnode in inst.secondary_nodes:
2066
          if secnode in node_to_secondary:
2067
            node_to_secondary[secnode].add(inst.name)
2068

    
2069
    master_node = self.cfg.GetMasterNode()
2070

    
2071
    # end data gathering
2072

    
2073
    output = []
2074
    for node in nodelist:
2075
      node_output = []
2076
      for field in self.op.output_fields:
2077
        if field == "name":
2078
          val = node.name
2079
        elif field == "pinst_list":
2080
          val = list(node_to_primary[node.name])
2081
        elif field == "sinst_list":
2082
          val = list(node_to_secondary[node.name])
2083
        elif field == "pinst_cnt":
2084
          val = len(node_to_primary[node.name])
2085
        elif field == "sinst_cnt":
2086
          val = len(node_to_secondary[node.name])
2087
        elif field == "pip":
2088
          val = node.primary_ip
2089
        elif field == "sip":
2090
          val = node.secondary_ip
2091
        elif field == "tags":
2092
          val = list(node.GetTags())
2093
        elif field == "serial_no":
2094
          val = node.serial_no
2095
        elif field == "master_candidate":
2096
          val = node.master_candidate
2097
        elif field == "master":
2098
          val = node.name == master_node
2099
        elif field == "offline":
2100
          val = node.offline
2101
        elif field == "drained":
2102
          val = node.drained
2103
        elif self._FIELDS_DYNAMIC.Matches(field):
2104
          val = live_data[node.name].get(field, None)
2105
        else:
2106
          raise errors.ParameterError(field)
2107
        node_output.append(val)
2108
      output.append(node_output)
2109

    
2110
    return output
2111

    
2112

    
2113
class LUQueryNodeVolumes(NoHooksLU):
2114
  """Logical unit for getting volumes on node(s).
2115

2116
  """
2117
  _OP_REQP = ["nodes", "output_fields"]
2118
  REQ_BGL = False
2119
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2120
  _FIELDS_STATIC = utils.FieldSet("node")
2121

    
2122
  def ExpandNames(self):
2123
    _CheckOutputFields(static=self._FIELDS_STATIC,
2124
                       dynamic=self._FIELDS_DYNAMIC,
2125
                       selected=self.op.output_fields)
2126

    
2127
    self.needed_locks = {}
2128
    self.share_locks[locking.LEVEL_NODE] = 1
2129
    if not self.op.nodes:
2130
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2131
    else:
2132
      self.needed_locks[locking.LEVEL_NODE] = \
2133
        _GetWantedNodes(self, self.op.nodes)
2134

    
2135
  def CheckPrereq(self):
2136
    """Check prerequisites.
2137

2138
    This checks that the fields required are valid output fields.
2139

2140
    """
2141
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2142

    
2143
  def Exec(self, feedback_fn):
2144
    """Computes the list of nodes and their attributes.
2145

2146
    """
2147
    nodenames = self.nodes
2148
    volumes = self.rpc.call_node_volumes(nodenames)
2149

    
2150
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2151
             in self.cfg.GetInstanceList()]
2152

    
2153
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2154

    
2155
    output = []
2156
    for node in nodenames:
2157
      nresult = volumes[node]
2158
      if nresult.offline:
2159
        continue
2160
      msg = nresult.fail_msg
2161
      if msg:
2162
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2163
        continue
2164

    
2165
      node_vols = nresult.payload[:]
2166
      node_vols.sort(key=lambda vol: vol['dev'])
2167

    
2168
      for vol in node_vols:
2169
        node_output = []
2170
        for field in self.op.output_fields:
2171
          if field == "node":
2172
            val = node
2173
          elif field == "phys":
2174
            val = vol['dev']
2175
          elif field == "vg":
2176
            val = vol['vg']
2177
          elif field == "name":
2178
            val = vol['name']
2179
          elif field == "size":
2180
            val = int(float(vol['size']))
2181
          elif field == "instance":
2182
            for inst in ilist:
2183
              if node not in lv_by_node[inst]:
2184
                continue
2185
              if vol['name'] in lv_by_node[inst][node]:
2186
                val = inst.name
2187
                break
2188
            else:
2189
              val = '-'
2190
          else:
2191
            raise errors.ParameterError(field)
2192
          node_output.append(str(val))
2193

    
2194
        output.append(node_output)
2195

    
2196
    return output
2197

    
2198

    
2199
class LUAddNode(LogicalUnit):
2200
  """Logical unit for adding node to the cluster.
2201

2202
  """
2203
  HPATH = "node-add"
2204
  HTYPE = constants.HTYPE_NODE
2205
  _OP_REQP = ["node_name"]
2206

    
2207
  def BuildHooksEnv(self):
2208
    """Build hooks env.
2209

2210
    This will run on all nodes before, and on all nodes + the new node after.
2211

2212
    """
2213
    env = {
2214
      "OP_TARGET": self.op.node_name,
2215
      "NODE_NAME": self.op.node_name,
2216
      "NODE_PIP": self.op.primary_ip,
2217
      "NODE_SIP": self.op.secondary_ip,
2218
      }
2219
    nodes_0 = self.cfg.GetNodeList()
2220
    nodes_1 = nodes_0 + [self.op.node_name, ]
2221
    return env, nodes_0, nodes_1
2222

    
2223
  def CheckPrereq(self):
2224
    """Check prerequisites.
2225

2226
    This checks:
2227
     - the new node is not already in the config
2228
     - it is resolvable
2229
     - its parameters (single/dual homed) matches the cluster
2230

2231
    Any errors are signalled by raising errors.OpPrereqError.
2232

2233
    """
2234
    node_name = self.op.node_name
2235
    cfg = self.cfg
2236

    
2237
    dns_data = utils.HostInfo(node_name)
2238

    
2239
    node = dns_data.name
2240
    primary_ip = self.op.primary_ip = dns_data.ip
2241
    secondary_ip = getattr(self.op, "secondary_ip", None)
2242
    if secondary_ip is None:
2243
      secondary_ip = primary_ip
2244
    if not utils.IsValidIP(secondary_ip):
2245
      raise errors.OpPrereqError("Invalid secondary IP given")
2246
    self.op.secondary_ip = secondary_ip
2247

    
2248
    node_list = cfg.GetNodeList()
2249
    if not self.op.readd and node in node_list:
2250
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2251
                                 node)
2252
    elif self.op.readd and node not in node_list:
2253
      raise errors.OpPrereqError("Node %s is not in the configuration" % node)
2254

    
2255
    for existing_node_name in node_list:
2256
      existing_node = cfg.GetNodeInfo(existing_node_name)
2257

    
2258
      if self.op.readd and node == existing_node_name:
2259
        if (existing_node.primary_ip != primary_ip or
2260
            existing_node.secondary_ip != secondary_ip):
2261
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2262
                                     " address configuration as before")
2263
        continue
2264

    
2265
      if (existing_node.primary_ip == primary_ip or
2266
          existing_node.secondary_ip == primary_ip or
2267
          existing_node.primary_ip == secondary_ip or
2268
          existing_node.secondary_ip == secondary_ip):
2269
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2270
                                   " existing node %s" % existing_node.name)
2271

    
2272
    # check that the type of the node (single versus dual homed) is the
2273
    # same as for the master
2274
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2275
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2276
    newbie_singlehomed = secondary_ip == primary_ip
2277
    if master_singlehomed != newbie_singlehomed:
2278
      if master_singlehomed:
2279
        raise errors.OpPrereqError("The master has no private ip but the"
2280
                                   " new node has one")
2281
      else:
2282
        raise errors.OpPrereqError("The master has a private ip but the"
2283
                                   " new node doesn't have one")
2284

    
2285
    # checks reachablity
2286
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2287
      raise errors.OpPrereqError("Node not reachable by ping")
2288

    
2289
    if not newbie_singlehomed:
2290
      # check reachability from my secondary ip to newbie's secondary ip
2291
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2292
                           source=myself.secondary_ip):
2293
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2294
                                   " based ping to noded port")
2295

    
2296
    cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2297
    mc_now, _ = self.cfg.GetMasterCandidateStats()
2298
    master_candidate = mc_now < cp_size
2299

    
2300
    self.new_node = objects.Node(name=node,
2301
                                 primary_ip=primary_ip,
2302
                                 secondary_ip=secondary_ip,
2303
                                 master_candidate=master_candidate,
2304
                                 offline=False, drained=False)
2305

    
2306
  def Exec(self, feedback_fn):
2307
    """Adds the new node to the cluster.
2308

2309
    """
2310
    new_node = self.new_node
2311
    node = new_node.name
2312

    
2313
    # check connectivity
2314
    result = self.rpc.call_version([node])[node]
2315
    result.Raise("Can't get version information from node %s" % node)
2316
    if constants.PROTOCOL_VERSION == result.payload:
2317
      logging.info("Communication to node %s fine, sw version %s match",
2318
                   node, result.payload)
2319
    else:
2320
      raise errors.OpExecError("Version mismatch master version %s,"
2321
                               " node version %s" %
2322
                               (constants.PROTOCOL_VERSION, result.payload))
2323

    
2324
    # setup ssh on node
2325
    logging.info("Copy ssh key to node %s", node)
2326
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
2327
    keyarray = []
2328
    keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
2329
                constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
2330
                priv_key, pub_key]
2331

    
2332
    for i in keyfiles:
2333
      f = open(i, 'r')
2334
      try:
2335
        keyarray.append(f.read())
2336
      finally:
2337
        f.close()
2338

    
2339
    result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
2340
                                    keyarray[2],
2341
                                    keyarray[3], keyarray[4], keyarray[5])
2342
    result.Raise("Cannot transfer ssh keys to the new node")
2343

    
2344
    # Add node to our /etc/hosts, and add key to known_hosts
2345
    if self.cfg.GetClusterInfo().modify_etc_hosts:
2346
      utils.AddHostToEtcHosts(new_node.name)
2347

    
2348
    if new_node.secondary_ip != new_node.primary_ip:
2349
      result = self.rpc.call_node_has_ip_address(new_node.name,
2350
                                                 new_node.secondary_ip)
2351
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
2352
                   prereq=True)
2353
      if not result.payload:
2354
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
2355
                                 " you gave (%s). Please fix and re-run this"
2356
                                 " command." % new_node.secondary_ip)
2357

    
2358
    node_verify_list = [self.cfg.GetMasterNode()]
2359
    node_verify_param = {
2360
      'nodelist': [node],
2361
      # TODO: do a node-net-test as well?
2362
    }
2363

    
2364
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2365
                                       self.cfg.GetClusterName())
2366
    for verifier in node_verify_list:
2367
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
2368
      nl_payload = result[verifier].payload['nodelist']
2369
      if nl_payload:
2370
        for failed in nl_payload:
2371
          feedback_fn("ssh/hostname verification failed %s -> %s" %
2372
                      (verifier, nl_payload[failed]))
2373
        raise errors.OpExecError("ssh/hostname verification failed.")
2374

    
2375
    if self.op.readd:
2376
      _RedistributeAncillaryFiles(self)
2377
      self.context.ReaddNode(new_node)
2378
    else:
2379
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
2380
      self.context.AddNode(new_node)
2381

    
2382

    
2383
class LUSetNodeParams(LogicalUnit):
2384
  """Modifies the parameters of a node.
2385

2386
  """
2387
  HPATH = "node-modify"
2388
  HTYPE = constants.HTYPE_NODE
2389
  _OP_REQP = ["node_name"]
2390
  REQ_BGL = False
2391

    
2392
  def CheckArguments(self):
2393
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2394
    if node_name is None:
2395
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2396
    self.op.node_name = node_name
2397
    _CheckBooleanOpField(self.op, 'master_candidate')
2398
    _CheckBooleanOpField(self.op, 'offline')
2399
    _CheckBooleanOpField(self.op, 'drained')
2400
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
2401
    if all_mods.count(None) == 3:
2402
      raise errors.OpPrereqError("Please pass at least one modification")
2403
    if all_mods.count(True) > 1:
2404
      raise errors.OpPrereqError("Can't set the node into more than one"
2405
                                 " state at the same time")
2406

    
2407
  def ExpandNames(self):
2408
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
2409

    
2410
  def BuildHooksEnv(self):
2411
    """Build hooks env.
2412

2413
    This runs on the master node.
2414

2415
    """
2416
    env = {
2417
      "OP_TARGET": self.op.node_name,
2418
      "MASTER_CANDIDATE": str(self.op.master_candidate),
2419
      "OFFLINE": str(self.op.offline),
2420
      "DRAINED": str(self.op.drained),
2421
      }
2422
    nl = [self.cfg.GetMasterNode(),
2423
          self.op.node_name]
2424
    return env, nl, nl
2425

    
2426
  def CheckPrereq(self):
2427
    """Check prerequisites.
2428

2429
    This only checks the instance list against the existing names.
2430

2431
    """
2432
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
2433

    
2434
    if ((self.op.master_candidate == False or self.op.offline == True or
2435
         self.op.drained == True) and node.master_candidate):
2436
      # we will demote the node from master_candidate
2437
      if self.op.node_name == self.cfg.GetMasterNode():
2438
        raise errors.OpPrereqError("The master node has to be a"
2439
                                   " master candidate, online and not drained")
2440
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2441
      num_candidates, _ = self.cfg.GetMasterCandidateStats()
2442
      if num_candidates <= cp_size:
2443
        msg = ("Not enough master candidates (desired"
2444
               " %d, new value will be %d)" % (cp_size, num_candidates-1))
2445
        if self.op.force:
2446
          self.LogWarning(msg)
2447
        else:
2448
          raise errors.OpPrereqError(msg)
2449

    
2450
    if (self.op.master_candidate == True and
2451
        ((node.offline and not self.op.offline == False) or
2452
         (node.drained and not self.op.drained == False))):
2453
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
2454
                                 " to master_candidate" % node.name)
2455

    
2456
    return
2457

    
2458
  def Exec(self, feedback_fn):
2459
    """Modifies a node.
2460

2461
    """
2462
    node = self.node
2463

    
2464
    result = []
2465
    changed_mc = False
2466

    
2467
    if self.op.offline is not None:
2468
      node.offline = self.op.offline
2469
      result.append(("offline", str(self.op.offline)))
2470
      if self.op.offline == True:
2471
        if node.master_candidate:
2472
          node.master_candidate = False
2473
          changed_mc = True
2474
          result.append(("master_candidate", "auto-demotion due to offline"))
2475
        if node.drained:
2476
          node.drained = False
2477
          result.append(("drained", "clear drained status due to offline"))
2478

    
2479
    if self.op.master_candidate is not None:
2480
      node.master_candidate = self.op.master_candidate
2481
      changed_mc = True
2482
      result.append(("master_candidate", str(self.op.master_candidate)))
2483
      if self.op.master_candidate == False:
2484
        rrc = self.rpc.call_node_demote_from_mc(node.name)
2485
        msg = rrc.fail_msg
2486
        if msg:
2487
          self.LogWarning("Node failed to demote itself: %s" % msg)
2488

    
2489
    if self.op.drained is not None:
2490
      node.drained = self.op.drained
2491
      result.append(("drained", str(self.op.drained)))
2492
      if self.op.drained == True:
2493
        if node.master_candidate:
2494
          node.master_candidate = False
2495
          changed_mc = True
2496
          result.append(("master_candidate", "auto-demotion due to drain"))
2497
        if node.offline:
2498
          node.offline = False
2499
          result.append(("offline", "clear offline status due to drain"))
2500

    
2501
    # this will trigger configuration file update, if needed
2502
    self.cfg.Update(node)
2503
    # this will trigger job queue propagation or cleanup
2504
    if changed_mc:
2505
      self.context.ReaddNode(node)
2506

    
2507
    return result
2508

    
2509

    
2510
class LUPowercycleNode(NoHooksLU):
2511
  """Powercycles a node.
2512

2513
  """
2514
  _OP_REQP = ["node_name", "force"]
2515
  REQ_BGL = False
2516

    
2517
  def CheckArguments(self):
2518
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2519
    if node_name is None:
2520
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2521
    self.op.node_name = node_name
2522
    if node_name == self.cfg.GetMasterNode() and not self.op.force:
2523
      raise errors.OpPrereqError("The node is the master and the force"
2524
                                 " parameter was not set")
2525

    
2526
  def ExpandNames(self):
2527
    """Locking for PowercycleNode.
2528

2529
    This is a last-resource option and shouldn't block on other
2530
    jobs. Therefore, we grab no locks.
2531

2532
    """
2533
    self.needed_locks = {}
2534

    
2535
  def CheckPrereq(self):
2536
    """Check prerequisites.
2537

2538
    This LU has no prereqs.
2539

2540
    """
2541
    pass
2542

    
2543
  def Exec(self, feedback_fn):
2544
    """Reboots a node.
2545

2546
    """
2547
    result = self.rpc.call_node_powercycle(self.op.node_name,
2548
                                           self.cfg.GetHypervisorType())
2549
    result.Raise("Failed to schedule the reboot")
2550
    return result.payload
2551

    
2552

    
2553
class LUQueryClusterInfo(NoHooksLU):
2554
  """Query cluster configuration.
2555

2556
  """
2557
  _OP_REQP = []
2558
  REQ_BGL = False
2559

    
2560
  def ExpandNames(self):
2561
    self.needed_locks = {}
2562

    
2563
  def CheckPrereq(self):
2564
    """No prerequsites needed for this LU.
2565

2566
    """
2567
    pass
2568

    
2569
  def Exec(self, feedback_fn):
2570
    """Return cluster config.
2571

2572
    """
2573
    cluster = self.cfg.GetClusterInfo()
2574
    result = {
2575
      "software_version": constants.RELEASE_VERSION,
2576
      "protocol_version": constants.PROTOCOL_VERSION,
2577
      "config_version": constants.CONFIG_VERSION,
2578
      "os_api_version": constants.OS_API_VERSION,
2579
      "export_version": constants.EXPORT_VERSION,
2580
      "architecture": (platform.architecture()[0], platform.machine()),
2581
      "name": cluster.cluster_name,
2582
      "master": cluster.master_node,
2583
      "default_hypervisor": cluster.default_hypervisor,
2584
      "enabled_hypervisors": cluster.enabled_hypervisors,
2585
      "hvparams": dict([(hypervisor, cluster.hvparams[hypervisor])
2586
                        for hypervisor in cluster.enabled_hypervisors]),
2587
      "beparams": cluster.beparams,
2588
      "nicparams": cluster.nicparams,
2589
      "candidate_pool_size": cluster.candidate_pool_size,
2590
      "master_netdev": cluster.master_netdev,
2591
      "volume_group_name": cluster.volume_group_name,
2592
      "file_storage_dir": cluster.file_storage_dir,
2593
      }
2594

    
2595
    return result
2596

    
2597

    
2598
class LUQueryConfigValues(NoHooksLU):
2599
  """Return configuration values.
2600

2601
  """
2602
  _OP_REQP = []
2603
  REQ_BGL = False
2604
  _FIELDS_DYNAMIC = utils.FieldSet()
2605
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag")
2606

    
2607
  def ExpandNames(self):
2608
    self.needed_locks = {}
2609

    
2610
    _CheckOutputFields(static=self._FIELDS_STATIC,
2611
                       dynamic=self._FIELDS_DYNAMIC,
2612
                       selected=self.op.output_fields)
2613

    
2614
  def CheckPrereq(self):
2615
    """No prerequisites.
2616

2617
    """
2618
    pass
2619

    
2620
  def Exec(self, feedback_fn):
2621
    """Dump a representation of the cluster config to the standard output.
2622

2623
    """
2624
    values = []
2625
    for field in self.op.output_fields:
2626
      if field == "cluster_name":
2627
        entry = self.cfg.GetClusterName()
2628
      elif field == "master_node":
2629
        entry = self.cfg.GetMasterNode()
2630
      elif field == "drain_flag":
2631
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
2632
      else:
2633
        raise errors.ParameterError(field)
2634
      values.append(entry)
2635
    return values
2636

    
2637

    
2638
class LUActivateInstanceDisks(NoHooksLU):
2639
  """Bring up an instance's disks.
2640

2641
  """
2642
  _OP_REQP = ["instance_name"]
2643
  REQ_BGL = False
2644

    
2645
  def ExpandNames(self):
2646
    self._ExpandAndLockInstance()
2647
    self.needed_locks[locking.LEVEL_NODE] = []
2648
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2649

    
2650
  def DeclareLocks(self, level):
2651
    if level == locking.LEVEL_NODE:
2652
      self._LockInstancesNodes()
2653

    
2654
  def CheckPrereq(self):
2655
    """Check prerequisites.
2656

2657
    This checks that the instance is in the cluster.
2658

2659
    """
2660
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2661
    assert self.instance is not None, \
2662
      "Cannot retrieve locked instance %s" % self.op.instance_name
2663
    _CheckNodeOnline(self, self.instance.primary_node)
2664

    
2665
  def Exec(self, feedback_fn):
2666
    """Activate the disks.
2667

2668
    """
2669
    disks_ok, disks_info = _AssembleInstanceDisks(self, self.instance)
2670
    if not disks_ok:
2671
      raise errors.OpExecError("Cannot activate block devices")
2672

    
2673
    return disks_info
2674

    
2675

    
2676
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
2677
  """Prepare the block devices for an instance.
2678

2679
  This sets up the block devices on all nodes.
2680

2681
  @type lu: L{LogicalUnit}
2682
  @param lu: the logical unit on whose behalf we execute
2683
  @type instance: L{objects.Instance}
2684
  @param instance: the instance for whose disks we assemble
2685
  @type ignore_secondaries: boolean
2686
  @param ignore_secondaries: if true, errors on secondary nodes
2687
      won't result in an error return from the function
2688
  @return: False if the operation failed, otherwise a list of
2689
      (host, instance_visible_name, node_visible_name)
2690
      with the mapping from node devices to instance devices
2691

2692
  """
2693
  device_info = []
2694
  disks_ok = True
2695
  iname = instance.name
2696
  # With the two passes mechanism we try to reduce the window of
2697
  # opportunity for the race condition of switching DRBD to primary
2698
  # before handshaking occured, but we do not eliminate it
2699

    
2700
  # The proper fix would be to wait (with some limits) until the
2701
  # connection has been made and drbd transitions from WFConnection
2702
  # into any other network-connected state (Connected, SyncTarget,
2703
  # SyncSource, etc.)
2704

    
2705
  # 1st pass, assemble on all nodes in secondary mode
2706
  for inst_disk in instance.disks:
2707
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2708
      lu.cfg.SetDiskID(node_disk, node)
2709
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
2710
      msg = result.fail_msg
2711
      if msg:
2712
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
2713
                           " (is_primary=False, pass=1): %s",
2714
                           inst_disk.iv_name, node, msg)
2715
        if not ignore_secondaries:
2716
          disks_ok = False
2717

    
2718
  # FIXME: race condition on drbd migration to primary
2719

    
2720
  # 2nd pass, do only the primary node
2721
  for inst_disk in instance.disks:
2722
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2723
      if node != instance.primary_node:
2724
        continue
2725
      lu.cfg.SetDiskID(node_disk, node)
2726
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
2727
      msg = result.fail_msg
2728
      if msg:
2729
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
2730
                           " (is_primary=True, pass=2): %s",
2731
                           inst_disk.iv_name, node, msg)
2732
        disks_ok = False
2733
    device_info.append((instance.primary_node, inst_disk.iv_name,
2734
                        result.payload))
2735

    
2736
  # leave the disks configured for the primary node
2737
  # this is a workaround that would be fixed better by
2738
  # improving the logical/physical id handling
2739
  for disk in instance.disks:
2740
    lu.cfg.SetDiskID(disk, instance.primary_node)
2741

    
2742
  return disks_ok, device_info
2743

    
2744

    
2745
def _StartInstanceDisks(lu, instance, force):
2746
  """Start the disks of an instance.
2747

2748
  """
2749
  disks_ok, dummy = _AssembleInstanceDisks(lu, instance,
2750
                                           ignore_secondaries=force)
2751
  if not disks_ok:
2752
    _ShutdownInstanceDisks(lu, instance)
2753
    if force is not None and not force:
2754
      lu.proc.LogWarning("", hint="If the message above refers to a"
2755
                         " secondary node,"
2756
                         " you can retry the operation using '--force'.")
2757
    raise errors.OpExecError("Disk consistency error")
2758

    
2759

    
2760
class LUDeactivateInstanceDisks(NoHooksLU):
2761
  """Shutdown an instance's disks.
2762

2763
  """
2764
  _OP_REQP = ["instance_name"]
2765
  REQ_BGL = False
2766

    
2767
  def ExpandNames(self):
2768
    self._ExpandAndLockInstance()
2769
    self.needed_locks[locking.LEVEL_NODE] = []
2770
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2771

    
2772
  def DeclareLocks(self, level):
2773
    if level == locking.LEVEL_NODE:
2774
      self._LockInstancesNodes()
2775

    
2776
  def CheckPrereq(self):
2777
    """Check prerequisites.
2778

2779
    This checks that the instance is in the cluster.
2780

2781
    """
2782
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2783
    assert self.instance is not None, \
2784
      "Cannot retrieve locked instance %s" % self.op.instance_name
2785

    
2786
  def Exec(self, feedback_fn):
2787
    """Deactivate the disks
2788

2789
    """
2790
    instance = self.instance
2791
    _SafeShutdownInstanceDisks(self, instance)
2792

    
2793

    
2794
def _SafeShutdownInstanceDisks(lu, instance):
2795
  """Shutdown block devices of an instance.
2796

2797
  This function checks if an instance is running, before calling
2798
  _ShutdownInstanceDisks.
2799

2800
  """
2801
  pnode = instance.primary_node
2802
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
2803
  ins_l.Raise("Can't contact node %s" % pnode)
2804

    
2805
  if instance.name in ins_l.payload:
2806
    raise errors.OpExecError("Instance is running, can't shutdown"
2807
                             " block devices.")
2808

    
2809
  _ShutdownInstanceDisks(lu, instance)
2810

    
2811

    
2812
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
2813
  """Shutdown block devices of an instance.
2814

2815
  This does the shutdown on all nodes of the instance.
2816

2817
  If the ignore_primary is false, errors on the primary node are
2818
  ignored.
2819

2820
  """
2821
  all_result = True
2822
  for disk in instance.disks:
2823
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
2824
      lu.cfg.SetDiskID(top_disk, node)
2825
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
2826
      msg = result.fail_msg
2827
      if msg:
2828
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
2829
                      disk.iv_name, node, msg)
2830
        if not ignore_primary or node != instance.primary_node:
2831
          all_result = False
2832
  return all_result
2833

    
2834

    
2835
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
2836
  """Checks if a node has enough free memory.
2837

2838
  This function check if a given node has the needed amount of free
2839
  memory. In case the node has less memory or we cannot get the
2840
  information from the node, this function raise an OpPrereqError
2841
  exception.
2842

2843
  @type lu: C{LogicalUnit}
2844
  @param lu: a logical unit from which we get configuration data
2845
  @type node: C{str}
2846
  @param node: the node to check
2847
  @type reason: C{str}
2848
  @param reason: string to use in the error message
2849
  @type requested: C{int}
2850
  @param requested: the amount of memory in MiB to check for
2851
  @type hypervisor_name: C{str}
2852
  @param hypervisor_name: the hypervisor to ask for memory stats
2853
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
2854
      we cannot check the node
2855

2856
  """
2857
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
2858
  nodeinfo[node].Raise("Can't get data from node %s" % node, prereq=True)
2859
  free_mem = nodeinfo[node].payload.get('memory_free', None)
2860
  if not isinstance(free_mem, int):
2861
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
2862
                               " was '%s'" % (node, free_mem))
2863
  if requested > free_mem:
2864
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
2865
                               " needed %s MiB, available %s MiB" %
2866
                               (node, reason, requested, free_mem))
2867

    
2868

    
2869
class LUStartupInstance(LogicalUnit):
2870
  """Starts an instance.
2871

2872
  """
2873
  HPATH = "instance-start"
2874
  HTYPE = constants.HTYPE_INSTANCE
2875
  _OP_REQP = ["instance_name", "force"]
2876
  REQ_BGL = False
2877

    
2878
  def ExpandNames(self):
2879
    self._ExpandAndLockInstance()
2880

    
2881
  def BuildHooksEnv(self):
2882
    """Build hooks env.
2883

2884
    This runs on master, primary and secondary nodes of the instance.
2885

2886
    """
2887
    env = {
2888
      "FORCE": self.op.force,
2889
      }
2890
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
2891
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
2892
    return env, nl, nl
2893

    
2894
  def CheckPrereq(self):
2895
    """Check prerequisites.
2896

2897
    This checks that the instance is in the cluster.
2898

2899
    """
2900
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2901
    assert self.instance is not None, \
2902
      "Cannot retrieve locked instance %s" % self.op.instance_name
2903

    
2904
    # extra beparams
2905
    self.beparams = getattr(self.op, "beparams", {})
2906
    if self.beparams:
2907
      if not isinstance(self.beparams, dict):
2908
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
2909
                                   " dict" % (type(self.beparams), ))
2910
      # fill the beparams dict
2911
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
2912
      self.op.beparams = self.beparams
2913

    
2914
    # extra hvparams
2915
    self.hvparams = getattr(self.op, "hvparams", {})
2916
    if self.hvparams:
2917
      if not isinstance(self.hvparams, dict):
2918
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
2919
                                   " dict" % (type(self.hvparams), ))
2920

    
2921
      # check hypervisor parameter syntax (locally)
2922
      cluster = self.cfg.GetClusterInfo()
2923
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
2924
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
2925
                                    instance.hvparams)
2926
      filled_hvp.update(self.hvparams)
2927
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
2928
      hv_type.CheckParameterSyntax(filled_hvp)
2929
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
2930
      self.op.hvparams = self.hvparams
2931

    
2932
    _CheckNodeOnline(self, instance.primary_node)
2933

    
2934
    bep = self.cfg.GetClusterInfo().FillBE(instance)
2935
    # check bridges existance
2936
    _CheckInstanceBridgesExist(self, instance)
2937

    
2938
    remote_info = self.rpc.call_instance_info(instance.primary_node,
2939
                                              instance.name,
2940
                                              instance.hypervisor)
2941
    remote_info.Raise("Error checking node %s" % instance.primary_node,
2942
                      prereq=True)
2943
    if not remote_info.payload: # not running already
2944
      _CheckNodeFreeMemory(self, instance.primary_node,
2945
                           "starting instance %s" % instance.name,
2946
                           bep[constants.BE_MEMORY], instance.hypervisor)
2947

    
2948
  def Exec(self, feedback_fn):
2949
    """Start the instance.
2950

2951
    """
2952
    instance = self.instance
2953
    force = self.op.force
2954

    
2955
    self.cfg.MarkInstanceUp(instance.name)
2956

    
2957
    node_current = instance.primary_node
2958

    
2959
    _StartInstanceDisks(self, instance, force)
2960

    
2961
    result = self.rpc.call_instance_start(node_current, instance,
2962
                                          self.hvparams, self.beparams)
2963
    msg = result.fail_msg
2964
    if msg:
2965
      _ShutdownInstanceDisks(self, instance)
2966
      raise errors.OpExecError("Could not start instance: %s" % msg)
2967

    
2968

    
2969
class LURebootInstance(LogicalUnit):
2970
  """Reboot an instance.
2971

2972
  """
2973
  HPATH = "instance-reboot"
2974
  HTYPE = constants.HTYPE_INSTANCE
2975
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
2976
  REQ_BGL = False
2977

    
2978
  def ExpandNames(self):
2979
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
2980
                                   constants.INSTANCE_REBOOT_HARD,
2981
                                   constants.INSTANCE_REBOOT_FULL]:
2982
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
2983
                                  (constants.INSTANCE_REBOOT_SOFT,
2984
                                   constants.INSTANCE_REBOOT_HARD,
2985
                                   constants.INSTANCE_REBOOT_FULL))
2986
    self._ExpandAndLockInstance()
2987

    
2988
  def BuildHooksEnv(self):
2989
    """Build hooks env.
2990

2991
    This runs on master, primary and secondary nodes of the instance.
2992

2993
    """
2994
    env = {
2995
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
2996
      "REBOOT_TYPE": self.op.reboot_type,
2997
      }
2998
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
2999
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3000
    return env, nl, nl
3001

    
3002
  def CheckPrereq(self):
3003
    """Check prerequisites.
3004

3005
    This checks that the instance is in the cluster.
3006

3007
    """
3008
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3009
    assert self.instance is not None, \
3010
      "Cannot retrieve locked instance %s" % self.op.instance_name
3011

    
3012
    _CheckNodeOnline(self, instance.primary_node)
3013

    
3014
    # check bridges existance
3015
    _CheckInstanceBridgesExist(self, instance)
3016

    
3017
  def Exec(self, feedback_fn):
3018
    """Reboot the instance.
3019

3020
    """
3021
    instance = self.instance
3022
    ignore_secondaries = self.op.ignore_secondaries
3023
    reboot_type = self.op.reboot_type
3024

    
3025
    node_current = instance.primary_node
3026

    
3027
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3028
                       constants.INSTANCE_REBOOT_HARD]:
3029
      for disk in instance.disks:
3030
        self.cfg.SetDiskID(disk, node_current)
3031
      result = self.rpc.call_instance_reboot(node_current, instance,
3032
                                             reboot_type)
3033
      result.Raise("Could not reboot instance")
3034
    else:
3035
      result = self.rpc.call_instance_shutdown(node_current, instance)
3036
      result.Raise("Could not shutdown instance for full reboot")
3037
      _ShutdownInstanceDisks(self, instance)
3038
      _StartInstanceDisks(self, instance, ignore_secondaries)
3039
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3040
      msg = result.fail_msg
3041
      if msg:
3042
        _ShutdownInstanceDisks(self, instance)
3043
        raise errors.OpExecError("Could not start instance for"
3044
                                 " full reboot: %s" % msg)
3045

    
3046
    self.cfg.MarkInstanceUp(instance.name)
3047

    
3048

    
3049
class LUShutdownInstance(LogicalUnit):
3050
  """Shutdown an instance.
3051

3052
  """
3053
  HPATH = "instance-stop"
3054
  HTYPE = constants.HTYPE_INSTANCE
3055
  _OP_REQP = ["instance_name"]
3056
  REQ_BGL = False
3057

    
3058
  def ExpandNames(self):
3059
    self._ExpandAndLockInstance()
3060

    
3061
  def BuildHooksEnv(self):
3062
    """Build hooks env.
3063

3064
    This runs on master, primary and secondary nodes of the instance.
3065

3066
    """
3067
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3068
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3069
    return env, nl, nl
3070

    
3071
  def CheckPrereq(self):
3072
    """Check prerequisites.
3073

3074
    This checks that the instance is in the cluster.
3075

3076
    """
3077
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3078
    assert self.instance is not None, \
3079
      "Cannot retrieve locked instance %s" % self.op.instance_name
3080
    _CheckNodeOnline(self, self.instance.primary_node)
3081

    
3082
  def Exec(self, feedback_fn):
3083
    """Shutdown the instance.
3084

3085
    """
3086
    instance = self.instance
3087
    node_current = instance.primary_node
3088
    self.cfg.MarkInstanceDown(instance.name)
3089
    result = self.rpc.call_instance_shutdown(node_current, instance)
3090
    msg = result.fail_msg
3091
    if msg:
3092
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3093

    
3094
    _ShutdownInstanceDisks(self, instance)
3095

    
3096

    
3097
class LUReinstallInstance(LogicalUnit):
3098
  """Reinstall an instance.
3099

3100
  """
3101
  HPATH = "instance-reinstall"
3102
  HTYPE = constants.HTYPE_INSTANCE
3103
  _OP_REQP = ["instance_name"]
3104
  REQ_BGL = False
3105

    
3106
  def ExpandNames(self):
3107
    self._ExpandAndLockInstance()
3108

    
3109
  def BuildHooksEnv(self):
3110
    """Build hooks env.
3111

3112
    This runs on master, primary and secondary nodes of the instance.
3113

3114
    """
3115
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3116
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3117
    return env, nl, nl
3118

    
3119
  def CheckPrereq(self):
3120
    """Check prerequisites.
3121

3122
    This checks that the instance is in the cluster and is not running.
3123

3124
    """
3125
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3126
    assert instance is not None, \
3127
      "Cannot retrieve locked instance %s" % self.op.instance_name
3128
    _CheckNodeOnline(self, instance.primary_node)
3129

    
3130
    if instance.disk_template == constants.DT_DISKLESS:
3131
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3132
                                 self.op.instance_name)
3133
    if instance.admin_up:
3134
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3135
                                 self.op.instance_name)
3136
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3137
                                              instance.name,
3138
                                              instance.hypervisor)
3139
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3140
                      prereq=True)
3141
    if remote_info.payload:
3142
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3143
                                 (self.op.instance_name,
3144
                                  instance.primary_node))
3145

    
3146
    self.op.os_type = getattr(self.op, "os_type", None)
3147
    if self.op.os_type is not None:
3148
      # OS verification
3149
      pnode = self.cfg.GetNodeInfo(
3150
        self.cfg.ExpandNodeName(instance.primary_node))
3151
      if pnode is None:
3152
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
3153
                                   self.op.pnode)
3154
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3155
      result.Raise("OS '%s' not in supported OS list for primary node %s" %
3156
                   (self.op.os_type, pnode.name), prereq=True)
3157

    
3158
    self.instance = instance
3159

    
3160
  def Exec(self, feedback_fn):
3161
    """Reinstall the instance.
3162

3163
    """
3164
    inst = self.instance
3165

    
3166
    if self.op.os_type is not None:
3167
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3168
      inst.os = self.op.os_type
3169
      self.cfg.Update(inst)
3170

    
3171
    _StartInstanceDisks(self, inst, None)
3172
    try:
3173
      feedback_fn("Running the instance OS create scripts...")
3174
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
3175
      result.Raise("Could not install OS for instance %s on node %s" %
3176
                   (inst.name, inst.primary_node))
3177
    finally:
3178
      _ShutdownInstanceDisks(self, inst)
3179

    
3180

    
3181
class LURenameInstance(LogicalUnit):
3182
  """Rename an instance.
3183

3184
  """
3185
  HPATH = "instance-rename"
3186
  HTYPE = constants.HTYPE_INSTANCE
3187
  _OP_REQP = ["instance_name", "new_name"]
3188

    
3189
  def BuildHooksEnv(self):
3190
    """Build hooks env.
3191

3192
    This runs on master, primary and secondary nodes of the instance.
3193

3194
    """
3195
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3196
    env["INSTANCE_NEW_NAME"] = self.op.new_name
3197
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3198
    return env, nl, nl
3199

    
3200
  def CheckPrereq(self):
3201
    """Check prerequisites.
3202

3203
    This checks that the instance is in the cluster and is not running.
3204

3205
    """
3206
    instance = self.cfg.GetInstanceInfo(
3207
      self.cfg.ExpandInstanceName(self.op.instance_name))
3208
    if instance is None:
3209
      raise errors.OpPrereqError("Instance '%s' not known" %
3210
                                 self.op.instance_name)
3211
    _CheckNodeOnline(self, instance.primary_node)
3212

    
3213
    if instance.admin_up:
3214
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3215
                                 self.op.instance_name)
3216
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3217
                                              instance.name,
3218
                                              instance.hypervisor)
3219
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3220
                      prereq=True)
3221
    if remote_info.payload:
3222
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3223
                                 (self.op.instance_name,
3224
                                  instance.primary_node))
3225
    self.instance = instance
3226

    
3227
    # new name verification
3228
    name_info = utils.HostInfo(self.op.new_name)
3229

    
3230
    self.op.new_name = new_name = name_info.name
3231
    instance_list = self.cfg.GetInstanceList()
3232
    if new_name in instance_list:
3233
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3234
                                 new_name)
3235

    
3236
    if not getattr(self.op, "ignore_ip", False):
3237
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
3238
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
3239
                                   (name_info.ip, new_name))
3240

    
3241

    
3242
  def Exec(self, feedback_fn):
3243
    """Reinstall the instance.
3244

3245
    """
3246
    inst = self.instance
3247
    old_name = inst.name
3248

    
3249
    if inst.disk_template == constants.DT_FILE:
3250
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3251

    
3252
    self.cfg.RenameInstance(inst.name, self.op.new_name)
3253
    # Change the instance lock. This is definitely safe while we hold the BGL
3254
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
3255
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
3256

    
3257
    # re-read the instance from the configuration after rename
3258
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
3259

    
3260
    if inst.disk_template == constants.DT_FILE:
3261
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3262
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
3263
                                                     old_file_storage_dir,
3264
                                                     new_file_storage_dir)
3265
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
3266
                   " (but the instance has been renamed in Ganeti)" %
3267
                   (inst.primary_node, old_file_storage_dir,
3268
                    new_file_storage_dir))
3269

    
3270
    _StartInstanceDisks(self, inst, None)
3271
    try:
3272
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
3273
                                                 old_name)
3274
      msg = result.fail_msg
3275
      if msg:
3276
        msg = ("Could not run OS rename script for instance %s on node %s"
3277
               " (but the instance has been renamed in Ganeti): %s" %
3278
               (inst.name, inst.primary_node, msg))
3279
        self.proc.LogWarning(msg)
3280
    finally:
3281
      _ShutdownInstanceDisks(self, inst)
3282

    
3283

    
3284
class LURemoveInstance(LogicalUnit):
3285
  """Remove an instance.
3286

3287
  """
3288
  HPATH = "instance-remove"
3289
  HTYPE = constants.HTYPE_INSTANCE
3290
  _OP_REQP = ["instance_name", "ignore_failures"]
3291
  REQ_BGL = False
3292

    
3293
  def ExpandNames(self):
3294
    self._ExpandAndLockInstance()
3295
    self.needed_locks[locking.LEVEL_NODE] = []
3296
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3297

    
3298
  def DeclareLocks(self, level):
3299
    if level == locking.LEVEL_NODE:
3300
      self._LockInstancesNodes()
3301

    
3302
  def BuildHooksEnv(self):
3303
    """Build hooks env.
3304

3305
    This runs on master, primary and secondary nodes of the instance.
3306

3307
    """
3308
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3309
    nl = [self.cfg.GetMasterNode()]
3310
    return env, nl, nl
3311

    
3312
  def CheckPrereq(self):
3313
    """Check prerequisites.
3314

3315
    This checks that the instance is in the cluster.
3316

3317
    """
3318
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3319
    assert self.instance is not None, \
3320
      "Cannot retrieve locked instance %s" % self.op.instance_name
3321

    
3322
  def Exec(self, feedback_fn):
3323
    """Remove the instance.
3324

3325
    """
3326
    instance = self.instance
3327
    logging.info("Shutting down instance %s on node %s",
3328
                 instance.name, instance.primary_node)
3329

    
3330
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
3331
    msg = result.fail_msg
3332
    if msg:
3333
      if self.op.ignore_failures:
3334
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
3335
      else:
3336
        raise errors.OpExecError("Could not shutdown instance %s on"
3337
                                 " node %s: %s" %
3338
                                 (instance.name, instance.primary_node, msg))
3339

    
3340
    logging.info("Removing block devices for instance %s", instance.name)
3341

    
3342
    if not _RemoveDisks(self, instance):
3343
      if self.op.ignore_failures:
3344
        feedback_fn("Warning: can't remove instance's disks")
3345
      else:
3346
        raise errors.OpExecError("Can't remove instance's disks")
3347

    
3348
    logging.info("Removing instance %s out of cluster config", instance.name)
3349

    
3350
    self.cfg.RemoveInstance(instance.name)
3351
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
3352

    
3353

    
3354
class LUQueryInstances(NoHooksLU):
3355
  """Logical unit for querying instances.
3356

3357
  """
3358
  _OP_REQP = ["output_fields", "names", "use_locking"]
3359
  REQ_BGL = False
3360
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
3361
                                    "admin_state",
3362
                                    "disk_template", "ip", "mac", "bridge",
3363
                                    "sda_size", "sdb_size", "vcpus", "tags",
3364
                                    "network_port", "beparams",
3365
                                    r"(disk)\.(size)/([0-9]+)",
3366
                                    r"(disk)\.(sizes)", "disk_usage",
3367
                                    r"(nic)\.(mac|ip|bridge)/([0-9]+)",
3368
                                    r"(nic)\.(macs|ips|bridges)",
3369
                                    r"(disk|nic)\.(count)",
3370
                                    "serial_no", "hypervisor", "hvparams",] +
3371
                                  ["hv/%s" % name
3372
                                   for name in constants.HVS_PARAMETERS] +
3373
                                  ["be/%s" % name
3374
                                   for name in constants.BES_PARAMETERS])
3375
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
3376

    
3377

    
3378
  def ExpandNames(self):
3379
    _CheckOutputFields(static=self._FIELDS_STATIC,
3380
                       dynamic=self._FIELDS_DYNAMIC,
3381
                       selected=self.op.output_fields)
3382

    
3383
    self.needed_locks = {}
3384
    self.share_locks[locking.LEVEL_INSTANCE] = 1
3385
    self.share_locks[locking.LEVEL_NODE] = 1
3386

    
3387
    if self.op.names:
3388
      self.wanted = _GetWantedInstances(self, self.op.names)
3389
    else:
3390
      self.wanted = locking.ALL_SET
3391

    
3392
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3393
    self.do_locking = self.do_node_query and self.op.use_locking
3394
    if self.do_locking:
3395
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3396
      self.needed_locks[locking.LEVEL_NODE] = []
3397
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3398

    
3399
  def DeclareLocks(self, level):
3400
    if level == locking.LEVEL_NODE and self.do_locking:
3401
      self._LockInstancesNodes()
3402

    
3403
  def CheckPrereq(self):
3404
    """Check prerequisites.
3405

3406
    """
3407
    pass
3408

    
3409
  def Exec(self, feedback_fn):
3410
    """Computes the list of nodes and their attributes.
3411

3412
    """
3413
    all_info = self.cfg.GetAllInstancesInfo()
3414
    if self.wanted == locking.ALL_SET:
3415
      # caller didn't specify instance names, so ordering is not important
3416
      if self.do_locking:
3417
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
3418
      else:
3419
        instance_names = all_info.keys()
3420
      instance_names = utils.NiceSort(instance_names)
3421
    else:
3422
      # caller did specify names, so we must keep the ordering
3423
      if self.do_locking:
3424
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
3425
      else:
3426
        tgt_set = all_info.keys()
3427
      missing = set(self.wanted).difference(tgt_set)
3428
      if missing:
3429
        raise errors.OpExecError("Some instances were removed before"
3430
                                 " retrieving their data: %s" % missing)
3431
      instance_names = self.wanted
3432

    
3433
    instance_list = [all_info[iname] for iname in instance_names]
3434

    
3435
    # begin data gathering
3436

    
3437
    nodes = frozenset([inst.primary_node for inst in instance_list])
3438
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
3439

    
3440
    bad_nodes = []
3441
    off_nodes = []
3442
    if self.do_node_query:
3443
      live_data = {}
3444
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
3445
      for name in nodes:
3446
        result = node_data[name]
3447
        if result.offline:
3448
          # offline nodes will be in both lists
3449
          off_nodes.append(name)
3450
        if result.failed or result.fail_msg:
3451
          bad_nodes.append(name)
3452
        else:
3453
          if result.payload:
3454
            live_data.update(result.payload)
3455
          # else no instance is alive
3456
    else:
3457
      live_data = dict([(name, {}) for name in instance_names])
3458

    
3459
    # end data gathering
3460

    
3461
    HVPREFIX = "hv/"
3462
    BEPREFIX = "be/"
3463
    output = []
3464
    for instance in instance_list:
3465
      iout = []
3466
      i_hv = self.cfg.GetClusterInfo().FillHV(instance)
3467
      i_be = self.cfg.GetClusterInfo().FillBE(instance)
3468
      for field in self.op.output_fields:
3469
        st_match = self._FIELDS_STATIC.Matches(field)
3470
        if field == "name":
3471
          val = instance.name
3472
        elif field == "os":
3473
          val = instance.os
3474
        elif field == "pnode":
3475
          val = instance.primary_node
3476
        elif field == "snodes":
3477
          val = list(instance.secondary_nodes)
3478
        elif field == "admin_state":
3479
          val = instance.admin_up
3480
        elif field == "oper_state":
3481
          if instance.primary_node in bad_nodes:
3482
            val = None
3483
          else:
3484
            val = bool(live_data.get(instance.name))
3485
        elif field == "status":
3486
          if instance.primary_node in off_nodes:
3487
            val = "ERROR_nodeoffline"
3488
          elif instance.primary_node in bad_nodes:
3489
            val = "ERROR_nodedown"
3490
          else:
3491
            running = bool(live_data.get(instance.name))
3492
            if running:
3493
              if instance.admin_up:
3494
                val = "running"
3495
              else:
3496
                val = "ERROR_up"
3497
            else:
3498
              if instance.admin_up:
3499
                val = "ERROR_down"
3500
              else:
3501
                val = "ADMIN_down"
3502
        elif field == "oper_ram":
3503
          if instance.primary_node in bad_nodes:
3504
            val = None
3505
          elif instance.name in live_data:
3506
            val = live_data[instance.name].get("memory", "?")
3507
          else:
3508
            val = "-"
3509
        elif field == "disk_template":
3510
          val = instance.disk_template
3511
        elif field == "ip":
3512
          if instance.nics:
3513
            val = instance.nics[0].ip
3514
          else:
3515
            val = None
3516
        elif field == "bridge":
3517
          if instance.nics:
3518
            val = instance.nics[0].bridge
3519
          else:
3520
            val = None
3521
        elif field == "mac":
3522
          if instance.nics:
3523
            val = instance.nics[0].mac
3524
          else:
3525
            val = None
3526
        elif field == "sda_size" or field == "sdb_size":
3527
          idx = ord(field[2]) - ord('a')
3528
          try:
3529
            val = instance.FindDisk(idx).size
3530
          except errors.OpPrereqError:
3531
            val = None
3532
        elif field == "disk_usage": # total disk usage per node
3533
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
3534
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
3535
        elif field == "tags":
3536
          val = list(instance.GetTags())
3537
        elif field == "serial_no":
3538
          val = instance.serial_no
3539
        elif field == "network_port":
3540
          val = instance.network_port
3541
        elif field == "hypervisor":
3542
          val = instance.hypervisor
3543
        elif field == "hvparams":
3544
          val = i_hv
3545
        elif (field.startswith(HVPREFIX) and
3546
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
3547
          val = i_hv.get(field[len(HVPREFIX):], None)
3548
        elif field == "beparams":
3549
          val = i_be
3550
        elif (field.startswith(BEPREFIX) and
3551
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
3552
          val = i_be.get(field[len(BEPREFIX):], None)
3553
        elif st_match and st_match.groups():
3554
          # matches a variable list
3555
          st_groups = st_match.groups()
3556
          if st_groups and st_groups[0] == "disk":
3557
            if st_groups[1] == "count":
3558
              val = len(instance.disks)
3559
            elif st_groups[1] == "sizes":
3560
              val = [disk.size for disk in instance.disks]
3561
            elif st_groups[1] == "size":
3562
              try:
3563
                val = instance.FindDisk(st_groups[2]).size
3564
              except errors.OpPrereqError:
3565
                val = None
3566
            else:
3567
              assert False, "Unhandled disk parameter"
3568
          elif st_groups[0] == "nic":
3569
            if st_groups[1] == "count":
3570
              val = len(instance.nics)
3571
            elif st_groups[1] == "macs":
3572
              val = [nic.mac for nic in instance.nics]
3573
            elif st_groups[1] == "ips":
3574
              val = [nic.ip for nic in instance.nics]
3575
            elif st_groups[1] == "bridges":
3576
              val = [nic.bridge for nic in instance.nics]
3577
            else:
3578
              # index-based item
3579
              nic_idx = int(st_groups[2])
3580
              if nic_idx >= len(instance.nics):
3581
                val = None
3582
              else:
3583
                if st_groups[1] == "mac":
3584
                  val = instance.nics[nic_idx].mac
3585
                elif st_groups[1] == "ip":
3586
                  val = instance.nics[nic_idx].ip
3587
                elif st_groups[1] == "bridge":
3588
                  val = instance.nics[nic_idx].bridge
3589
                else:
3590
                  assert False, "Unhandled NIC parameter"
3591
          else:
3592
            assert False, "Unhandled variable parameter"
3593
        else:
3594
          raise errors.ParameterError(field)
3595
        iout.append(val)
3596
      output.append(iout)
3597

    
3598
    return output
3599

    
3600

    
3601
class LUFailoverInstance(LogicalUnit):
3602
  """Failover an instance.
3603

3604
  """
3605
  HPATH = "instance-failover"
3606
  HTYPE = constants.HTYPE_INSTANCE
3607
  _OP_REQP = ["instance_name", "ignore_consistency"]
3608
  REQ_BGL = False
3609

    
3610
  def ExpandNames(self):
3611
    self._ExpandAndLockInstance()
3612
    self.needed_locks[locking.LEVEL_NODE] = []
3613
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3614

    
3615
  def DeclareLocks(self, level):
3616
    if level == locking.LEVEL_NODE:
3617
      self._LockInstancesNodes()
3618

    
3619
  def BuildHooksEnv(self):
3620
    """Build hooks env.
3621

3622
    This runs on master, primary and secondary nodes of the instance.
3623

3624
    """
3625
    env = {
3626
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
3627
      }
3628
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3629
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
3630
    return env, nl, nl
3631

    
3632
  def CheckPrereq(self):
3633
    """Check prerequisites.
3634

3635
    This checks that the instance is in the cluster.
3636

3637
    """
3638
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3639
    assert self.instance is not None, \
3640
      "Cannot retrieve locked instance %s" % self.op.instance_name
3641

    
3642
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3643
    if instance.disk_template not in constants.DTS_NET_MIRROR:
3644
      raise errors.OpPrereqError("Instance's disk layout is not"
3645
                                 " network mirrored, cannot failover.")
3646

    
3647
    secondary_nodes = instance.secondary_nodes
3648
    if not secondary_nodes:
3649
      raise errors.ProgrammerError("no secondary node but using "
3650
                                   "a mirrored disk template")
3651

    
3652
    target_node = secondary_nodes[0]
3653
    _CheckNodeOnline(self, target_node)
3654
    _CheckNodeNotDrained(self, target_node)
3655
    if instance.admin_up:
3656
      # check memory requirements on the secondary node
3657
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
3658
                           instance.name, bep[constants.BE_MEMORY],
3659
                           instance.hypervisor)
3660
    else:
3661
      self.LogInfo("Not checking memory on the secondary node as"
3662
                   " instance will not be started")
3663

    
3664
    # check bridge existance
3665
    _CheckInstanceBridgesExist(self, instance, node=target_node)
3666

    
3667
  def Exec(self, feedback_fn):
3668
    """Failover an instance.
3669

3670
    The failover is done by shutting it down on its present node and
3671
    starting it on the secondary.
3672

3673
    """
3674
    instance = self.instance
3675

    
3676
    source_node = instance.primary_node
3677
    target_node = instance.secondary_nodes[0]
3678

    
3679
    feedback_fn("* checking disk consistency between source and target")
3680
    for dev in instance.disks:
3681
      # for drbd, these are drbd over lvm
3682
      if not _CheckDiskConsistency(self, dev, target_node, False):
3683
        if instance.admin_up and not self.op.ignore_consistency:
3684
          raise errors.OpExecError("Disk %s is degraded on target node,"
3685
                                   " aborting failover." % dev.iv_name)
3686

    
3687
    feedback_fn("* shutting down instance on source node")
3688
    logging.info("Shutting down instance %s on node %s",
3689
                 instance.name, source_node)
3690

    
3691
    result = self.rpc.call_instance_shutdown(source_node, instance)
3692
    msg = result.fail_msg
3693
    if msg:
3694
      if self.op.ignore_consistency:
3695
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
3696
                             " Proceeding anyway. Please make sure node"
3697
                             " %s is down. Error details: %s",
3698
                             instance.name, source_node, source_node, msg)
3699
      else:
3700
        raise errors.OpExecError("Could not shutdown instance %s on"
3701
                                 " node %s: %s" %
3702
                                 (instance.name, source_node, msg))
3703

    
3704
    feedback_fn("* deactivating the instance's disks on source node")
3705
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
3706
      raise errors.OpExecError("Can't shut down the instance's disks.")
3707

    
3708
    instance.primary_node = target_node
3709
    # distribute new instance config to the other nodes
3710
    self.cfg.Update(instance)
3711

    
3712
    # Only start the instance if it's marked as up
3713
    if instance.admin_up:
3714
      feedback_fn("* activating the instance's disks on target node")
3715
      logging.info("Starting instance %s on node %s",
3716
                   instance.name, target_node)
3717

    
3718
      disks_ok, dummy = _AssembleInstanceDisks(self, instance,
3719
                                               ignore_secondaries=True)
3720
      if not disks_ok:
3721
        _ShutdownInstanceDisks(self, instance)
3722
        raise errors.OpExecError("Can't activate the instance's disks")
3723

    
3724
      feedback_fn("* starting the instance on the target node")
3725
      result = self.rpc.call_instance_start(target_node, instance, None, None)
3726
      msg = result.fail_msg
3727
      if msg:
3728
        _ShutdownInstanceDisks(self, instance)
3729
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
3730
                                 (instance.name, target_node, msg))
3731

    
3732

    
3733
class LUMigrateInstance(LogicalUnit):
3734
  """Migrate an instance.
3735

3736
  This is migration without shutting down, compared to the failover,
3737
  which is done with shutdown.
3738

3739
  """
3740
  HPATH = "instance-migrate"
3741
  HTYPE = constants.HTYPE_INSTANCE
3742
  _OP_REQP = ["instance_name", "live", "cleanup"]
3743

    
3744
  REQ_BGL = False
3745

    
3746
  def ExpandNames(self):
3747
    self._ExpandAndLockInstance()
3748
    self.needed_locks[locking.LEVEL_NODE] = []
3749
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3750

    
3751
  def DeclareLocks(self, level):
3752
    if level == locking.LEVEL_NODE:
3753
      self._LockInstancesNodes()
3754

    
3755
  def BuildHooksEnv(self):
3756
    """Build hooks env.
3757

3758
    This runs on master, primary and secondary nodes of the instance.
3759

3760
    """
3761
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3762
    env["MIGRATE_LIVE"] = self.op.live
3763
    env["MIGRATE_CLEANUP"] = self.op.cleanup
3764
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
3765
    return env, nl, nl
3766

    
3767
  def CheckPrereq(self):
3768
    """Check prerequisites.
3769

3770
    This checks that the instance is in the cluster.
3771

3772
    """
3773
    instance = self.cfg.GetInstanceInfo(
3774
      self.cfg.ExpandInstanceName(self.op.instance_name))
3775
    if instance is None:
3776
      raise errors.OpPrereqError("Instance '%s' not known" %
3777
                                 self.op.instance_name)
3778

    
3779
    if instance.disk_template != constants.DT_DRBD8:
3780
      raise errors.OpPrereqError("Instance's disk layout is not"
3781
                                 " drbd8, cannot migrate.")
3782

    
3783
    secondary_nodes = instance.secondary_nodes
3784
    if not secondary_nodes:
3785
      raise errors.ConfigurationError("No secondary node but using"
3786
                                      " drbd8 disk template")
3787

    
3788
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
3789

    
3790
    target_node = secondary_nodes[0]
3791
    # check memory requirements on the secondary node
3792
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
3793
                         instance.name, i_be[constants.BE_MEMORY],
3794
                         instance.hypervisor)
3795

    
3796
    # check bridge existance
3797
    _CheckInstanceBridgesExist(self, instance, node=target_node)
3798

    
3799
    if not self.op.cleanup:
3800
      _CheckNodeNotDrained(self, target_node)
3801
      result = self.rpc.call_instance_migratable(instance.primary_node,
3802
                                                 instance)
3803
      result.Raise("Can't migrate, please use failover", prereq=True)
3804

    
3805
    self.instance = instance
3806

    
3807
  def _WaitUntilSync(self):
3808
    """Poll with custom rpc for disk sync.
3809

3810
    This uses our own step-based rpc call.
3811

3812
    """
3813
    self.feedback_fn("* wait until resync is done")
3814
    all_done = False
3815
    while not all_done:
3816
      all_done = True
3817
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
3818
                                            self.nodes_ip,
3819
                                            self.instance.disks)
3820
      min_percent = 100
3821
      for node, nres in result.items():
3822
        nres.Raise("Cannot resync disks on node %s" % node)
3823
        node_done, node_percent = nres.payload
3824
        all_done = all_done and node_done
3825
        if node_percent is not None:
3826
          min_percent = min(min_percent, node_percent)
3827
      if not all_done:
3828
        if min_percent < 100:
3829
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
3830
        time.sleep(2)
3831

    
3832
  def _EnsureSecondary(self, node):
3833
    """Demote a node to secondary.
3834

3835
    """
3836
    self.feedback_fn("* switching node %s to secondary mode" % node)
3837

    
3838
    for dev in self.instance.disks:
3839
      self.cfg.SetDiskID(dev, node)
3840

    
3841
    result = self.rpc.call_blockdev_close(node, self.instance.name,
3842
                                          self.instance.disks)
3843
    result.Raise("Cannot change disk to secondary on node %s" % node)
3844

    
3845
  def _GoStandalone(self):
3846
    """Disconnect from the network.
3847

3848
    """
3849
    self.feedback_fn("* changing into standalone mode")
3850
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
3851
                                               self.instance.disks)
3852
    for node, nres in result.items():
3853
      nres.Raise("Cannot disconnect disks node %s" % node)
3854

    
3855
  def _GoReconnect(self, multimaster):
3856
    """Reconnect to the network.
3857

3858
    """
3859
    if multimaster:
3860
      msg = "dual-master"
3861
    else:
3862
      msg = "single-master"
3863
    self.feedback_fn("* changing disks into %s mode" % msg)
3864
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
3865
                                           self.instance.disks,
3866
                                           self.instance.name, multimaster)
3867
    for node, nres in result.items():
3868
      nres.Raise("Cannot change disks config on node %s" % node)
3869

    
3870
  def _ExecCleanup(self):
3871
    """Try to cleanup after a failed migration.
3872

3873
    The cleanup is done by:
3874
      - check that the instance is running only on one node
3875
        (and update the config if needed)
3876
      - change disks on its secondary node to secondary
3877
      - wait until disks are fully synchronized
3878
      - disconnect from the network
3879
      - change disks into single-master mode
3880
      - wait again until disks are fully synchronized
3881

3882
    """
3883
    instance = self.instance
3884
    target_node = self.target_node
3885
    source_node = self.source_node
3886

    
3887
    # check running on only one node
3888
    self.feedback_fn("* checking where the instance actually runs"
3889
                     " (if this hangs, the hypervisor might be in"
3890
                     " a bad state)")
3891
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
3892
    for node, result in ins_l.items():
3893
      result.Raise("Can't contact node %s" % node)
3894

    
3895
    runningon_source = instance.name in ins_l[source_node].payload
3896
    runningon_target = instance.name in ins_l[target_node].payload
3897

    
3898
    if runningon_source and runningon_target:
3899
      raise errors.OpExecError("Instance seems to be running on two nodes,"
3900
                               " or the hypervisor is confused. You will have"
3901
                               " to ensure manually that it runs only on one"
3902
                               " and restart this operation.")
3903

    
3904
    if not (runningon_source or runningon_target):
3905
      raise errors.OpExecError("Instance does not seem to be running at all."
3906
                               " In this case, it's safer to repair by"
3907
                               " running 'gnt-instance stop' to ensure disk"
3908
                               " shutdown, and then restarting it.")
3909

    
3910
    if runningon_target:
3911
      # the migration has actually succeeded, we need to update the config
3912
      self.feedback_fn("* instance running on secondary node (%s),"
3913
                       " updating config" % target_node)
3914
      instance.primary_node = target_node
3915
      self.cfg.Update(instance)
3916
      demoted_node = source_node
3917
    else:
3918
      self.feedback_fn("* instance confirmed to be running on its"
3919
                       " primary node (%s)" % source_node)
3920
      demoted_node = target_node
3921

    
3922
    self._EnsureSecondary(demoted_node)
3923
    try:
3924
      self._WaitUntilSync()
3925
    except errors.OpExecError:
3926
      # we ignore here errors, since if the device is standalone, it
3927
      # won't be able to sync
3928
      pass
3929
    self._GoStandalone()
3930
    self._GoReconnect(False)
3931
    self._WaitUntilSync()
3932

    
3933
    self.feedback_fn("* done")
3934

    
3935
  def _RevertDiskStatus(self):
3936
    """Try to revert the disk status after a failed migration.
3937

3938
    """
3939
    target_node = self.target_node
3940
    try:
3941
      self._EnsureSecondary(target_node)
3942
      self._GoStandalone()
3943
      self._GoReconnect(False)
3944
      self._WaitUntilSync()
3945
    except errors.OpExecError, err:
3946
      self.LogWarning("Migration failed and I can't reconnect the"
3947
                      " drives: error '%s'\n"
3948
                      "Please look and recover the instance status" %
3949
                      str(err))
3950

    
3951
  def _AbortMigration(self):
3952
    """Call the hypervisor code to abort a started migration.
3953

3954
    """
3955
    instance = self.instance
3956
    target_node = self.target_node
3957
    migration_info = self.migration_info
3958

    
3959
    abort_result = self.rpc.call_finalize_migration(target_node,
3960
                                                    instance,
3961
                                                    migration_info,
3962
                                                    False)
3963
    abort_msg = abort_result.fail_msg
3964
    if abort_msg:
3965
      logging.error("Aborting migration failed on target node %s: %s" %
3966
                    (target_node, abort_msg))
3967
      # Don't raise an exception here, as we stil have to try to revert the
3968
      # disk status, even if this step failed.
3969

    
3970
  def _ExecMigration(self):
3971
    """Migrate an instance.
3972

3973
    The migrate is done by:
3974
      - change the disks into dual-master mode
3975
      - wait until disks are fully synchronized again
3976
      - migrate the instance
3977
      - change disks on the new secondary node (the old primary) to secondary
3978
      - wait until disks are fully synchronized
3979
      - change disks into single-master mode
3980

3981
    """
3982
    instance = self.instance
3983
    target_node = self.target_node
3984
    source_node = self.source_node
3985

    
3986
    self.feedback_fn("* checking disk consistency between source and target")
3987
    for dev in instance.disks:
3988
      if not _CheckDiskConsistency(self, dev, target_node, False):
3989
        raise errors.OpExecError("Disk %s is degraded or not fully"
3990
                                 " synchronized on target node,"
3991
                                 " aborting migrate." % dev.iv_name)
3992

    
3993
    # First get the migration information from the remote node
3994
    result = self.rpc.call_migration_info(source_node, instance)
3995
    msg = result.fail_msg
3996
    if msg:
3997
      log_err = ("Failed fetching source migration information from %s: %s" %
3998
                 (source_node, msg))
3999
      logging.error(log_err)
4000
      raise errors.OpExecError(log_err)
4001

    
4002
    self.migration_info = migration_info = result.payload
4003

    
4004
    # Then switch the disks to master/master mode
4005
    self._EnsureSecondary(target_node)
4006
    self._GoStandalone()
4007
    self._GoReconnect(True)
4008
    self._WaitUntilSync()
4009

    
4010
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
4011
    result = self.rpc.call_accept_instance(target_node,
4012
                                           instance,
4013
                                           migration_info,
4014
                                           self.nodes_ip[target_node])
4015

    
4016
    msg = result.fail_msg
4017
    if msg:
4018
      logging.error("Instance pre-migration failed, trying to revert"
4019
                    " disk status: %s", msg)
4020
      self._AbortMigration()
4021
      self._RevertDiskStatus()
4022
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
4023
                               (instance.name, msg))
4024

    
4025
    self.feedback_fn("* migrating instance to %s" % target_node)
4026
    time.sleep(10)
4027
    result = self.rpc.call_instance_migrate(source_node, instance,
4028
                                            self.nodes_ip[target_node],
4029
                                            self.op.live)
4030
    msg = result.fail_msg
4031
    if msg:
4032
      logging.error("Instance migration failed, trying to revert"
4033
                    " disk status: %s", msg)
4034
      self._AbortMigration()
4035
      self._RevertDiskStatus()
4036
      raise errors.OpExecError("Could not migrate instance %s: %s" %
4037
                               (instance.name, msg))
4038
    time.sleep(10)
4039

    
4040
    instance.primary_node = target_node
4041
    # distribute new instance config to the other nodes
4042
    self.cfg.Update(instance)
4043

    
4044
    result = self.rpc.call_finalize_migration(target_node,
4045
                                              instance,
4046
                                              migration_info,
4047
                                              True)
4048
    msg = result.fail_msg
4049
    if msg:
4050
      logging.error("Instance migration succeeded, but finalization failed:"
4051
                    " %s" % msg)
4052
      raise errors.OpExecError("Could not finalize instance migration: %s" %
4053
                               msg)
4054

    
4055
    self._EnsureSecondary(source_node)
4056
    self._WaitUntilSync()
4057
    self._GoStandalone()
4058
    self._GoReconnect(False)
4059
    self._WaitUntilSync()
4060

    
4061
    self.feedback_fn("* done")
4062

    
4063
  def Exec(self, feedback_fn):
4064
    """Perform the migration.
4065

4066
    """
4067
    self.feedback_fn = feedback_fn
4068

    
4069
    self.source_node = self.instance.primary_node
4070
    self.target_node = self.instance.secondary_nodes[0]
4071
    self.all_nodes = [self.source_node, self.target_node]
4072
    self.nodes_ip = {
4073
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
4074
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
4075
      }
4076
    if self.op.cleanup:
4077
      return self._ExecCleanup()
4078
    else:
4079
      return self._ExecMigration()
4080

    
4081

    
4082
def _CreateBlockDev(lu, node, instance, device, force_create,
4083
                    info, force_open):
4084
  """Create a tree of block devices on a given node.
4085

4086
  If this device type has to be created on secondaries, create it and
4087
  all its children.
4088

4089
  If not, just recurse to children keeping the same 'force' value.
4090

4091
  @param lu: the lu on whose behalf we execute
4092
  @param node: the node on which to create the device
4093
  @type instance: L{objects.Instance}
4094
  @param instance: the instance which owns the device
4095
  @type device: L{objects.Disk}
4096
  @param device: the device to create
4097
  @type force_create: boolean
4098
  @param force_create: whether to force creation of this device; this
4099
      will be change to True whenever we find a device which has
4100
      CreateOnSecondary() attribute
4101
  @param info: the extra 'metadata' we should attach to the device
4102
      (this will be represented as a LVM tag)
4103
  @type force_open: boolean
4104
  @param force_open: this parameter will be passes to the
4105
      L{backend.BlockdevCreate} function where it specifies
4106
      whether we run on primary or not, and it affects both
4107
      the child assembly and the device own Open() execution
4108

4109
  """
4110
  if device.CreateOnSecondary():
4111
    force_create = True
4112

    
4113
  if device.children:
4114
    for child in device.children:
4115
      _CreateBlockDev(lu, node, instance, child, force_create,
4116
                      info, force_open)
4117

    
4118
  if not force_create:
4119
    return
4120

    
4121
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
4122

    
4123

    
4124
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
4125
  """Create a single block device on a given node.
4126

4127
  This will not recurse over children of the device, so they must be
4128
  created in advance.
4129

4130
  @param lu: the lu on whose behalf we execute
4131
  @param node: the node on which to create the device
4132
  @type instance: L{objects.Instance}
4133
  @param instance: the instance which owns the device
4134
  @type device: L{objects.Disk}
4135
  @param device: the device to create
4136
  @param info: the extra 'metadata' we should attach to the device
4137
      (this will be represented as a LVM tag)
4138
  @type force_open: boolean
4139
  @param force_open: this parameter will be passes to the
4140
      L{backend.BlockdevCreate} function where it specifies
4141
      whether we run on primary or not, and it affects both
4142
      the child assembly and the device own Open() execution
4143

4144
  """
4145
  lu.cfg.SetDiskID(device, node)
4146
  result = lu.rpc.call_blockdev_create(node, device, device.size,
4147
                                       instance.name, force_open, info)
4148
  result.Raise("Can't create block device %s on"
4149
               " node %s for instance %s" % (device, node, instance.name))
4150
  if device.physical_id is None:
4151
    device.physical_id = result.payload
4152

    
4153

    
4154
def _GenerateUniqueNames(lu, exts):
4155
  """Generate a suitable LV name.
4156

4157
  This will generate a logical volume name for the given instance.
4158

4159
  """
4160
  results = []
4161
  for val in exts:
4162
    new_id = lu.cfg.GenerateUniqueID()
4163
    results.append("%s%s" % (new_id, val))
4164
  return results
4165

    
4166

    
4167
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
4168
                         p_minor, s_minor):
4169
  """Generate a drbd8 device complete with its children.
4170

4171
  """
4172
  port = lu.cfg.AllocatePort()
4173
  vgname = lu.cfg.GetVGName()
4174
  shared_secret = lu.cfg.GenerateDRBDSecret()
4175
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
4176
                          logical_id=(vgname, names[0]))
4177
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
4178
                          logical_id=(vgname, names[1]))
4179
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
4180
                          logical_id=(primary, secondary, port,
4181
                                      p_minor, s_minor,
4182
                                      shared_secret),
4183
                          children=[dev_data, dev_meta],
4184
                          iv_name=iv_name)
4185
  return drbd_dev
4186

    
4187

    
4188
def _GenerateDiskTemplate(lu, template_name,
4189
                          instance_name, primary_node,
4190
                          secondary_nodes, disk_info,
4191
                          file_storage_dir, file_driver,
4192
                          base_index):
4193
  """Generate the entire disk layout for a given template type.
4194

4195
  """
4196
  #TODO: compute space requirements
4197

    
4198
  vgname = lu.cfg.GetVGName()
4199
  disk_count = len(disk_info)
4200
  disks = []
4201
  if template_name == constants.DT_DISKLESS:
4202
    pass
4203
  elif template_name == constants.DT_PLAIN:
4204
    if len(secondary_nodes) != 0:
4205
      raise errors.ProgrammerError("Wrong template configuration")
4206

    
4207
    names = _GenerateUniqueNames(lu, [".disk%d" % i
4208
                                      for i in range(disk_count)])
4209
    for idx, disk in enumerate(disk_info):
4210
      disk_index = idx + base_index
4211
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
4212
                              logical_id=(vgname, names[idx]),
4213
                              iv_name="disk/%d" % disk_index,
4214
                              mode=disk["mode"])
4215
      disks.append(disk_dev)
4216
  elif template_name == constants.DT_DRBD8:
4217
    if len(secondary_nodes) != 1:
4218
      raise errors.ProgrammerError("Wrong template configuration")
4219
    remote_node = secondary_nodes[0]
4220
    minors = lu.cfg.AllocateDRBDMinor(
4221
      [primary_node, remote_node] * len(disk_info), instance_name)
4222

    
4223
    names = []
4224
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % i
4225
                                               for i in range(disk_count)]):
4226
      names.append(lv_prefix + "_data")
4227
      names.append(lv_prefix + "_meta")
4228
    for idx, disk in enumerate(disk_info):
4229
      disk_index = idx + base_index
4230
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
4231
                                      disk["size"], names[idx*2:idx*2+2],
4232
                                      "disk/%d" % disk_index,
4233
                                      minors[idx*2], minors[idx*2+1])
4234
      disk_dev.mode = disk["mode"]
4235
      disks.append(disk_dev)
4236
  elif template_name == constants.DT_FILE:
4237
    if len(secondary_nodes) != 0:
4238
      raise errors.ProgrammerError("Wrong template configuration")
4239

    
4240
    for idx, disk in enumerate(disk_info):
4241
      disk_index = idx + base_index
4242
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
4243
                              iv_name="disk/%d" % disk_index,
4244
                              logical_id=(file_driver,
4245
                                          "%s/disk%d" % (file_storage_dir,
4246
                                                         disk_index)),
4247
                              mode=disk["mode"])
4248
      disks.append(disk_dev)
4249
  else:
4250
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
4251
  return disks
4252

    
4253

    
4254
def _GetInstanceInfoText(instance):
4255
  """Compute that text that should be added to the disk's metadata.
4256

4257
  """
4258
  return "originstname+%s" % instance.name
4259

    
4260

    
4261
def _CreateDisks(lu, instance):
4262
  """Create all disks for an instance.
4263

4264
  This abstracts away some work from AddInstance.
4265

4266
  @type lu: L{LogicalUnit}
4267
  @param lu: the logical unit on whose behalf we execute
4268
  @type instance: L{objects.Instance}
4269
  @param instance: the instance whose disks we should create
4270
  @rtype: boolean
4271
  @return: the success of the creation
4272

4273
  """
4274
  info = _GetInstanceInfoText(instance)
4275
  pnode = instance.primary_node
4276

    
4277
  if instance.disk_template == constants.DT_FILE:
4278
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4279
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
4280

    
4281
    result.Raise("Failed to create directory '%s' on"
4282
                 " node %s: %s" % (file_storage_dir, pnode))
4283

    
4284
  # Note: this needs to be kept in sync with adding of disks in
4285
  # LUSetInstanceParams
4286
  for device in instance.disks:
4287
    logging.info("Creating volume %s for instance %s",
4288
                 device.iv_name, instance.name)
4289
    #HARDCODE
4290
    for node in instance.all_nodes:
4291
      f_create = node == pnode
4292
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
4293

    
4294

    
4295
def _RemoveDisks(lu, instance):
4296
  """Remove all disks for an instance.
4297

4298
  This abstracts away some work from `AddInstance()` and
4299
  `RemoveInstance()`. Note that in case some of the devices couldn't
4300
  be removed, the removal will continue with the other ones (compare
4301
  with `_CreateDisks()`).
4302

4303
  @type lu: L{LogicalUnit}
4304
  @param lu: the logical unit on whose behalf we execute
4305
  @type instance: L{objects.Instance}
4306
  @param instance: the instance whose disks we should remove
4307
  @rtype: boolean
4308
  @return: the success of the removal
4309

4310
  """
4311
  logging.info("Removing block devices for instance %s", instance.name)
4312

    
4313
  all_result = True
4314
  for device in instance.disks:
4315
    for node, disk in device.ComputeNodeTree(instance.primary_node):
4316
      lu.cfg.SetDiskID(disk, node)
4317
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
4318
      if msg:
4319
        lu.LogWarning("Could not remove block device %s on node %s,"
4320
                      " continuing anyway: %s", device.iv_name, node, msg)
4321
        all_result = False
4322

    
4323
  if instance.disk_template == constants.DT_FILE:
4324
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4325
    result = lu.rpc.call_file_storage_dir_remove(instance.primary_node,
4326
                                                 file_storage_dir)
4327
    msg = result.fail_msg
4328
    if msg:
4329
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
4330
                    file_storage_dir, instance.primary_node, msg)
4331
      all_result = False
4332

    
4333
  return all_result
4334

    
4335

    
4336
def _ComputeDiskSize(disk_template, disks):
4337
  """Compute disk size requirements in the volume group
4338

4339
  """
4340
  # Required free disk space as a function of disk and swap space
4341
  req_size_dict = {
4342
    constants.DT_DISKLESS: None,
4343
    constants.DT_PLAIN: sum(d["size"] for d in disks),
4344
    # 128 MB are added for drbd metadata for each disk
4345
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
4346
    constants.DT_FILE: None,
4347
  }
4348

    
4349
  if disk_template not in req_size_dict:
4350
    raise errors.ProgrammerError("Disk template '%s' size requirement"
4351
                                 " is unknown" %  disk_template)
4352

    
4353
  return req_size_dict[disk_template]
4354

    
4355

    
4356
def _CheckHVParams(lu, nodenames, hvname, hvparams):
4357
  """Hypervisor parameter validation.
4358

4359
  This function abstract the hypervisor parameter validation to be
4360
  used in both instance create and instance modify.
4361

4362
  @type lu: L{LogicalUnit}
4363
  @param lu: the logical unit for which we check
4364
  @type nodenames: list
4365
  @param nodenames: the list of nodes on which we should check
4366
  @type hvname: string
4367
  @param hvname: the name of the hypervisor we should use
4368
  @type hvparams: dict
4369
  @param hvparams: the parameters which we need to check
4370
  @raise errors.OpPrereqError: if the parameters are not valid
4371

4372
  """
4373
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
4374
                                                  hvname,
4375
                                                  hvparams)
4376
  for node in nodenames:
4377
    info = hvinfo[node]
4378
    if info.offline:
4379
      continue
4380
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
4381

    
4382

    
4383
class LUCreateInstance(LogicalUnit):
4384
  """Create an instance.
4385

4386
  """
4387
  HPATH = "instance-add"
4388
  HTYPE = constants.HTYPE_INSTANCE
4389
  _OP_REQP = ["instance_name", "disks", "disk_template",
4390
              "mode", "start",
4391
              "wait_for_sync", "ip_check", "nics",
4392
              "hvparams", "beparams"]
4393
  REQ_BGL = False
4394

    
4395
  def _ExpandNode(self, node):
4396
    """Expands and checks one node name.
4397

4398
    """
4399
    node_full = self.cfg.ExpandNodeName(node)
4400
    if node_full is None:
4401
      raise errors.OpPrereqError("Unknown node %s" % node)
4402
    return node_full
4403

    
4404
  def ExpandNames(self):
4405
    """ExpandNames for CreateInstance.
4406

4407
    Figure out the right locks for instance creation.
4408

4409
    """
4410
    self.needed_locks = {}
4411

    
4412
    # set optional parameters to none if they don't exist
4413
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
4414
      if not hasattr(self.op, attr):
4415
        setattr(self.op, attr, None)
4416

    
4417
    # cheap checks, mostly valid constants given
4418

    
4419
    # verify creation mode
4420
    if self.op.mode not in (constants.INSTANCE_CREATE,
4421
                            constants.INSTANCE_IMPORT):
4422
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
4423
                                 self.op.mode)
4424

    
4425
    # disk template and mirror node verification
4426
    if self.op.disk_template not in constants.DISK_TEMPLATES:
4427
      raise errors.OpPrereqError("Invalid disk template name")
4428

    
4429
    if self.op.hypervisor is None:
4430
      self.op.hypervisor = self.cfg.GetHypervisorType()
4431

    
4432
    cluster = self.cfg.GetClusterInfo()
4433
    enabled_hvs = cluster.enabled_hypervisors
4434
    if self.op.hypervisor not in enabled_hvs:
4435
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
4436
                                 " cluster (%s)" % (self.op.hypervisor,
4437
                                  ",".join(enabled_hvs)))
4438

    
4439
    # check hypervisor parameter syntax (locally)
4440
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4441
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
4442
                                  self.op.hvparams)
4443
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
4444
    hv_type.CheckParameterSyntax(filled_hvp)
4445
    self.hv_full = filled_hvp
4446

    
4447
    # fill and remember the beparams dict
4448
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4449
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
4450
                                    self.op.beparams)
4451

    
4452
    #### instance parameters check
4453

    
4454
    # instance name verification
4455
    hostname1 = utils.HostInfo(self.op.instance_name)
4456
    self.op.instance_name = instance_name = hostname1.name
4457

    
4458
    # this is just a preventive check, but someone might still add this
4459
    # instance in the meantime, and creation will fail at lock-add time
4460
    if instance_name in self.cfg.GetInstanceList():
4461
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4462
                                 instance_name)
4463

    
4464
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
4465

    
4466
    # NIC buildup
4467
    self.nics = []
4468
    for idx, nic in enumerate(self.op.nics):
4469
      nic_mode_req = nic.get("mode", None)
4470
      nic_mode = nic_mode_req
4471
      if nic_mode is None:
4472
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
4473

    
4474
      # in routed mode, for the first nic, the default ip is 'auto'
4475
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
4476
        default_ip_mode = constants.VALUE_AUTO
4477
      else:
4478
        default_ip_mode = constants.VALUE_NONE
4479

    
4480
      # ip validity checks
4481
      ip = nic.get("ip", default_ip_mode)
4482
      if ip is None or ip.lower() == constants.VALUE_NONE:
4483
        nic_ip = None
4484
      elif ip.lower() == constants.VALUE_AUTO:
4485
        nic_ip = hostname1.ip
4486
      else:
4487
        if not utils.IsValidIP(ip):
4488
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
4489
                                     " like a valid IP" % ip)
4490
        nic_ip = ip
4491

    
4492
      # TODO: check the ip for uniqueness !!
4493
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
4494
        raise errors.OpPrereqError("Routed nic mode requires an ip address")
4495

    
4496
      # MAC address verification
4497
      mac = nic.get("mac", constants.VALUE_AUTO)
4498
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
4499
        if not utils.IsValidMac(mac.lower()):
4500
          raise errors.OpPrereqError("Invalid MAC address specified: %s" %
4501
                                     mac)
4502
      # bridge verification
4503
      bridge = nic.get("bridge", None)
4504
      link = nic.get("link", None)
4505
      if bridge and link:
4506
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link' at the same time")
4507
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
4508
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic")
4509
      elif bridge:
4510
        link = bridge
4511

    
4512
      nicparams = {}
4513
      if nic_mode_req:
4514
        nicparams[constants.NIC_MODE] = nic_mode_req
4515
      if link:
4516
        nicparams[constants.NIC_LINK] = link
4517

    
4518
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4519
                                      nicparams)
4520
      objects.NIC.CheckParameterSyntax(check_params)
4521
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
4522

    
4523
    # disk checks/pre-build
4524
    self.disks = []
4525
    for disk in self.op.disks:
4526
      mode = disk.get("mode", constants.DISK_RDWR)
4527
      if mode not in constants.DISK_ACCESS_SET:
4528
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
4529
                                   mode)
4530
      size = disk.get("size", None)
4531
      if size is None:
4532
        raise errors.OpPrereqError("Missing disk size")
4533
      try:
4534
        size = int(size)
4535
      except ValueError:
4536
        raise errors.OpPrereqError("Invalid disk size '%s'" % size)
4537
      self.disks.append({"size": size, "mode": mode})
4538

    
4539
    # used in CheckPrereq for ip ping check
4540
    self.check_ip = hostname1.ip
4541

    
4542
    # file storage checks
4543
    if (self.op.file_driver and
4544
        not self.op.file_driver in constants.FILE_DRIVER):
4545
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
4546
                                 self.op.file_driver)
4547

    
4548
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
4549
      raise errors.OpPrereqError("File storage directory path not absolute")
4550

    
4551
    ### Node/iallocator related checks
4552
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
4553
      raise errors.OpPrereqError("One and only one of iallocator and primary"
4554
                                 " node must be given")
4555

    
4556
    if self.op.iallocator:
4557
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4558
    else:
4559
      self.op.pnode = self._ExpandNode(self.op.pnode)
4560
      nodelist = [self.op.pnode]
4561
      if self.op.snode is not None:
4562
        self.op.snode = self._ExpandNode(self.op.snode)
4563
        nodelist.append(self.op.snode)
4564
      self.needed_locks[locking.LEVEL_NODE] = nodelist
4565

    
4566
    # in case of import lock the source node too
4567
    if self.op.mode == constants.INSTANCE_IMPORT:
4568
      src_node = getattr(self.op, "src_node", None)
4569
      src_path = getattr(self.op, "src_path", None)
4570

    
4571
      if src_path is None:
4572
        self.op.src_path = src_path = self.op.instance_name
4573

    
4574
      if src_node is None:
4575
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4576
        self.op.src_node = None
4577
        if os.path.isabs(src_path):
4578
          raise errors.OpPrereqError("Importing an instance from an absolute"
4579
                                     " path requires a source node option.")
4580
      else:
4581
        self.op.src_node = src_node = self._ExpandNode(src_node)
4582
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4583
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
4584
        if not os.path.isabs(src_path):
4585
          self.op.src_path = src_path = \
4586
            os.path.join(constants.EXPORT_DIR, src_path)
4587

    
4588
    else: # INSTANCE_CREATE
4589
      if getattr(self.op, "os_type", None) is None:
4590
        raise errors.OpPrereqError("No guest OS specified")
4591

    
4592
  def _RunAllocator(self):
4593
    """Run the allocator based on input opcode.
4594

4595
    """
4596
    nics = [n.ToDict() for n in self.nics]
4597
    ial = IAllocator(self,
4598
                     mode=constants.IALLOCATOR_MODE_ALLOC,
4599
                     name=self.op.instance_name,
4600
                     disk_template=self.op.disk_template,
4601
                     tags=[],
4602
                     os=self.op.os_type,
4603
                     vcpus=self.be_full[constants.BE_VCPUS],
4604
                     mem_size=self.be_full[constants.BE_MEMORY],
4605
                     disks=self.disks,
4606
                     nics=nics,
4607
                     hypervisor=self.op.hypervisor,
4608
                     )
4609

    
4610
    ial.Run(self.op.iallocator)
4611

    
4612
    if not ial.success:
4613
      raise errors.OpPrereqError("Can't compute nodes using"
4614
                                 " iallocator '%s': %s" % (self.op.iallocator,
4615
                                                           ial.info))
4616
    if len(ial.nodes) != ial.required_nodes:
4617
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
4618
                                 " of nodes (%s), required %s" %
4619
                                 (self.op.iallocator, len(ial.nodes),
4620
                                  ial.required_nodes))
4621
    self.op.pnode = ial.nodes[0]
4622
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
4623
                 self.op.instance_name, self.op.iallocator,
4624
                 ", ".join(ial.nodes))
4625
    if ial.required_nodes == 2:
4626
      self.op.snode = ial.nodes[1]
4627

    
4628
  def BuildHooksEnv(self):
4629
    """Build hooks env.
4630

4631
    This runs on master, primary and secondary nodes of the instance.
4632

4633
    """
4634
    env = {
4635
      "ADD_MODE": self.op.mode,
4636
      }
4637
    if self.op.mode == constants.INSTANCE_IMPORT:
4638
      env["SRC_NODE"] = self.op.src_node
4639
      env["SRC_PATH"] = self.op.src_path
4640
      env["SRC_IMAGES"] = self.src_images
4641

    
4642
    env.update(_BuildInstanceHookEnv(
4643
      name=self.op.instance_name,
4644
      primary_node=self.op.pnode,
4645
      secondary_nodes=self.secondaries,
4646
      status=self.op.start,
4647
      os_type=self.op.os_type,
4648
      memory=self.be_full[constants.BE_MEMORY],
4649
      vcpus=self.be_full[constants.BE_VCPUS],
4650
      nics=_NICListToTuple(self, self.nics),
4651
      disk_template=self.op.disk_template,
4652
      disks=[(d["size"], d["mode"]) for d in self.disks],
4653
      bep=self.be_full,
4654
      hvp=self.hv_full,
4655
      hypervisor=self.op.hypervisor,
4656
    ))
4657

    
4658
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
4659
          self.secondaries)
4660
    return env, nl, nl
4661

    
4662

    
4663
  def CheckPrereq(self):
4664
    """Check prerequisites.
4665

4666
    """
4667
    if (not self.cfg.GetVGName() and
4668
        self.op.disk_template not in constants.DTS_NOT_LVM):
4669
      raise errors.OpPrereqError("Cluster does not support lvm-based"
4670
                                 " instances")
4671

    
4672
    if self.op.mode == constants.INSTANCE_IMPORT:
4673
      src_node = self.op.src_node
4674
      src_path = self.op.src_path
4675

    
4676
      if src_node is None:
4677
        locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
4678
        exp_list = self.rpc.call_export_list(locked_nodes)
4679
        found = False
4680
        for node in exp_list:
4681
          if exp_list[node].fail_msg:
4682
            continue
4683
          if src_path in exp_list[node].payload:
4684
            found = True
4685
            self.op.src_node = src_node = node
4686
            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
4687
                                                       src_path)
4688
            break
4689
        if not found:
4690
          raise errors.OpPrereqError("No export found for relative path %s" %
4691
                                      src_path)
4692

    
4693
      _CheckNodeOnline(self, src_node)
4694
      result = self.rpc.call_export_info(src_node, src_path)
4695
      result.Raise("No export or invalid export found in dir %s" % src_path)
4696

    
4697
      export_info = objects.SerializableConfigParser.Loads(str(result.payload))
4698
      if not export_info.has_section(constants.INISECT_EXP):
4699
        raise errors.ProgrammerError("Corrupted export config")
4700

    
4701
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
4702
      if (int(ei_version) != constants.EXPORT_VERSION):
4703
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
4704
                                   (ei_version, constants.EXPORT_VERSION))
4705

    
4706
      # Check that the new instance doesn't have less disks than the export
4707
      instance_disks = len(self.disks)
4708
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
4709
      if instance_disks < export_disks:
4710
        raise errors.OpPrereqError("Not enough disks to import."
4711
                                   " (instance: %d, export: %d)" %
4712
                                   (instance_disks, export_disks))
4713

    
4714
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
4715
      disk_images = []
4716
      for idx in range(export_disks):
4717
        option = 'disk%d_dump' % idx
4718
        if export_info.has_option(constants.INISECT_INS, option):
4719
          # FIXME: are the old os-es, disk sizes, etc. useful?
4720
          export_name = export_info.get(constants.INISECT_INS, option)
4721
          image = os.path.join(src_path, export_name)
4722
          disk_images.append(image)
4723
        else:
4724
          disk_images.append(False)
4725

    
4726
      self.src_images = disk_images
4727

    
4728
      old_name = export_info.get(constants.INISECT_INS, 'name')
4729
      # FIXME: int() here could throw a ValueError on broken exports
4730
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
4731
      if self.op.instance_name == old_name:
4732
        for idx, nic in enumerate(self.nics):
4733
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
4734
            nic_mac_ini = 'nic%d_mac' % idx
4735
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
4736

    
4737
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
4738
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
4739
    if self.op.start and not self.op.ip_check:
4740
      raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
4741
                                 " adding an instance in start mode")
4742

    
4743
    if self.op.ip_check:
4744
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
4745
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4746
                                   (self.check_ip, self.op.instance_name))
4747

    
4748
    #### mac address generation
4749
    # By generating here the mac address both the allocator and the hooks get
4750
    # the real final mac address rather than the 'auto' or 'generate' value.
4751
    # There is a race condition between the generation and the instance object
4752
    # creation, which means that we know the mac is valid now, but we're not
4753
    # sure it will be when we actually add the instance. If things go bad
4754
    # adding the instance will abort because of a duplicate mac, and the
4755
    # creation job will fail.
4756
    for nic in self.nics:
4757
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
4758
        nic.mac = self.cfg.GenerateMAC()
4759

    
4760
    #### allocator run
4761

    
4762
    if self.op.iallocator is not None:
4763
      self._RunAllocator()
4764

    
4765
    #### node related checks
4766

    
4767
    # check primary node
4768
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
4769
    assert self.pnode is not None, \
4770
      "Cannot retrieve locked node %s" % self.op.pnode
4771
    if pnode.offline:
4772
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
4773
                                 pnode.name)
4774
    if pnode.drained:
4775
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
4776
                                 pnode.name)
4777

    
4778
    self.secondaries = []
4779

    
4780
    # mirror node verification
4781
    if self.op.disk_template in constants.DTS_NET_MIRROR:
4782
      if self.op.snode is None:
4783
        raise errors.OpPrereqError("The networked disk templates need"
4784
                                   " a mirror node")
4785
      if self.op.snode == pnode.name:
4786
        raise errors.OpPrereqError("The secondary node cannot be"
4787
                                   " the primary node.")
4788
      _CheckNodeOnline(self, self.op.snode)
4789
      _CheckNodeNotDrained(self, self.op.snode)
4790
      self.secondaries.append(self.op.snode)
4791

    
4792
    nodenames = [pnode.name] + self.secondaries
4793

    
4794
    req_size = _ComputeDiskSize(self.op.disk_template,
4795
                                self.disks)
4796

    
4797
    # Check lv size requirements
4798
    if req_size is not None:
4799
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
4800
                                         self.op.hypervisor)
4801
      for node in nodenames:
4802
        info = nodeinfo[node]
4803
        info.Raise("Cannot get current information from node %s" % node)
4804
        info = info.payload
4805
        vg_free = info.get('vg_free', None)
4806
        if not isinstance(vg_free, int):
4807
          raise errors.OpPrereqError("Can't compute free disk space on"
4808
                                     " node %s" % node)
4809
        if req_size > vg_free:
4810
          raise errors.OpPrereqError("Not enough disk space on target node %s."
4811
                                     " %d MB available, %d MB required" %
4812
                                     (node, vg_free, req_size))
4813

    
4814
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
4815

    
4816
    # os verification
4817
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
4818
    result.Raise("OS '%s' not in supported os list for primary node %s" %
4819
                 (self.op.os_type, pnode.name), prereq=True)
4820

    
4821
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
4822

    
4823
    # memory check on primary node
4824
    if self.op.start:
4825
      _CheckNodeFreeMemory(self, self.pnode.name,
4826
                           "creating instance %s" % self.op.instance_name,
4827
                           self.be_full[constants.BE_MEMORY],
4828
                           self.op.hypervisor)
4829

    
4830
  def Exec(self, feedback_fn):
4831
    """Create and add the instance to the cluster.
4832

4833
    """
4834
    instance = self.op.instance_name
4835
    pnode_name = self.pnode.name
4836

    
4837
    ht_kind = self.op.hypervisor
4838
    if ht_kind in constants.HTS_REQ_PORT:
4839
      network_port = self.cfg.AllocatePort()
4840
    else:
4841
      network_port = None
4842

    
4843
    ##if self.op.vnc_bind_address is None:
4844
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
4845

    
4846
    # this is needed because os.path.join does not accept None arguments
4847
    if self.op.file_storage_dir is None:
4848
      string_file_storage_dir = ""
4849
    else:
4850
      string_file_storage_dir = self.op.file_storage_dir
4851

    
4852
    # build the full file storage dir path
4853
    file_storage_dir = os.path.normpath(os.path.join(
4854
                                        self.cfg.GetFileStorageDir(),
4855
                                        string_file_storage_dir, instance))
4856

    
4857

    
4858
    disks = _GenerateDiskTemplate(self,
4859
                                  self.op.disk_template,
4860
                                  instance, pnode_name,
4861
                                  self.secondaries,
4862
                                  self.disks,
4863
                                  file_storage_dir,
4864
                                  self.op.file_driver,
4865
                                  0)
4866

    
4867
    iobj = objects.Instance(name=instance, os=self.op.os_type,
4868
                            primary_node=pnode_name,
4869
                            nics=self.nics, disks=disks,
4870
                            disk_template=self.op.disk_template,
4871
                            admin_up=False,
4872
                            network_port=network_port,
4873
                            beparams=self.op.beparams,
4874
                            hvparams=self.op.hvparams,
4875
                            hypervisor=self.op.hypervisor,
4876
                            )
4877

    
4878
    feedback_fn("* creating instance disks...")
4879
    try:
4880
      _CreateDisks(self, iobj)
4881
    except errors.OpExecError:
4882
      self.LogWarning("Device creation failed, reverting...")
4883
      try:
4884
        _RemoveDisks(self, iobj)
4885
      finally:
4886
        self.cfg.ReleaseDRBDMinors(instance)
4887
        raise
4888

    
4889
    feedback_fn("adding instance %s to cluster config" % instance)
4890

    
4891
    self.cfg.AddInstance(iobj)
4892
    # Declare that we don't want to remove the instance lock anymore, as we've
4893
    # added the instance to the config
4894
    del self.remove_locks[locking.LEVEL_INSTANCE]
4895
    # Unlock all the nodes
4896
    if self.op.mode == constants.INSTANCE_IMPORT:
4897
      nodes_keep = [self.op.src_node]
4898
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
4899
                       if node != self.op.src_node]
4900
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
4901
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
4902
    else:
4903
      self.context.glm.release(locking.LEVEL_NODE)
4904
      del self.acquired_locks[locking.LEVEL_NODE]
4905

    
4906
    if self.op.wait_for_sync:
4907
      disk_abort = not _WaitForSync(self, iobj)
4908
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
4909
      # make sure the disks are not degraded (still sync-ing is ok)
4910
      time.sleep(15)
4911
      feedback_fn("* checking mirrors status")
4912
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
4913
    else:
4914
      disk_abort = False
4915

    
4916
    if disk_abort:
4917
      _RemoveDisks(self, iobj)
4918
      self.cfg.RemoveInstance(iobj.name)
4919
      # Make sure the instance lock gets removed
4920
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
4921
      raise errors.OpExecError("There are some degraded disks for"
4922
                               " this instance")
4923

    
4924
    feedback_fn("creating os for instance %s on node %s" %
4925
                (instance, pnode_name))
4926

    
4927
    if iobj.disk_template != constants.DT_DISKLESS:
4928
      if self.op.mode == constants.INSTANCE_CREATE:
4929
        feedback_fn("* running the instance OS create scripts...")
4930
        result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
4931
        result.Raise("Could not add os for instance %s"
4932
                     " on node %s" % (instance, pnode_name))
4933

    
4934
      elif self.op.mode == constants.INSTANCE_IMPORT:
4935
        feedback_fn("* running the instance OS import scripts...")
4936
        src_node = self.op.src_node
4937
        src_images = self.src_images
4938
        cluster_name = self.cfg.GetClusterName()
4939
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
4940
                                                         src_node, src_images,
4941
                                                         cluster_name)
4942
        msg = import_result.fail_msg
4943
        if msg:
4944
          self.LogWarning("Error while importing the disk images for instance"
4945
                          " %s on node %s: %s" % (instance, pnode_name, msg))
4946
      else:
4947
        # also checked in the prereq part
4948
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
4949
                                     % self.op.mode)
4950

    
4951
    if self.op.start:
4952
      iobj.admin_up = True
4953
      self.cfg.Update(iobj)
4954
      logging.info("Starting instance %s on node %s", instance, pnode_name)
4955
      feedback_fn("* starting instance...")
4956
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
4957
      result.Raise("Could not start instance")
4958

    
4959

    
4960
class LUConnectConsole(NoHooksLU):
4961
  """Connect to an instance's console.
4962

4963
  This is somewhat special in that it returns the command line that
4964
  you need to run on the master node in order to connect to the
4965
  console.
4966

4967
  """
4968
  _OP_REQP = ["instance_name"]
4969
  REQ_BGL = False
4970

    
4971
  def ExpandNames(self):
4972
    self._ExpandAndLockInstance()
4973

    
4974
  def CheckPrereq(self):
4975
    """Check prerequisites.
4976

4977
    This checks that the instance is in the cluster.
4978

4979
    """
4980
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4981
    assert self.instance is not None, \
4982
      "Cannot retrieve locked instance %s" % self.op.instance_name
4983
    _CheckNodeOnline(self, self.instance.primary_node)
4984

    
4985
  def Exec(self, feedback_fn):
4986
    """Connect to the console of an instance
4987

4988
    """
4989
    instance = self.instance
4990
    node = instance.primary_node
4991

    
4992
    node_insts = self.rpc.call_instance_list([node],
4993
                                             [instance.hypervisor])[node]
4994
    node_insts.Raise("Can't get node information from %s" % node)
4995

    
4996
    if instance.name not in node_insts.payload:
4997
      raise errors.OpExecError("Instance %s is not running." % instance.name)
4998

    
4999
    logging.debug("Connecting to console of %s on %s", instance.name, node)
5000

    
5001
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
5002
    cluster = self.cfg.GetClusterInfo()
5003
    # beparams and hvparams are passed separately, to avoid editing the
5004
    # instance and then saving the defaults in the instance itself.
5005
    hvparams = cluster.FillHV(instance)
5006
    beparams = cluster.FillBE(instance)
5007
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
5008

    
5009
    # build ssh cmdline
5010
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
5011

    
5012

    
5013
class LUReplaceDisks(LogicalUnit):
5014
  """Replace the disks of an instance.
5015

5016
  """
5017
  HPATH = "mirrors-replace"
5018
  HTYPE = constants.HTYPE_INSTANCE
5019
  _OP_REQP = ["instance_name", "mode", "disks"]
5020
  REQ_BGL = False
5021

    
5022
  def CheckArguments(self):
5023
    if not hasattr(self.op, "remote_node"):
5024
      self.op.remote_node = None
5025
    if not hasattr(self.op, "iallocator"):
5026
      self.op.iallocator = None
5027

    
5028
    # check for valid parameter combination
5029
    cnt = [self.op.remote_node, self.op.iallocator].count(None)
5030
    if self.op.mode == constants.REPLACE_DISK_CHG:
5031
      if cnt == 2:
5032
        raise errors.OpPrereqError("When changing the secondary either an"
5033
                                   " iallocator script must be used or the"
5034
                                   " new node given")
5035
      elif cnt == 0:
5036
        raise errors.OpPrereqError("Give either the iallocator or the new"
5037
                                   " secondary, not both")
5038
    else: # not replacing the secondary
5039
      if cnt != 2:
5040
        raise errors.OpPrereqError("The iallocator and new node options can"
5041
                                   " be used only when changing the"
5042
                                   " secondary node")
5043

    
5044
  def ExpandNames(self):
5045
    self._ExpandAndLockInstance()
5046

    
5047
    if self.op.iallocator is not None:
5048
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5049
    elif self.op.remote_node is not None:
5050
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
5051
      if remote_node is None:
5052
        raise errors.OpPrereqError("Node '%s' not known" %
5053
                                   self.op.remote_node)
5054
      self.op.remote_node = remote_node
5055
      # Warning: do not remove the locking of the new secondary here
5056
      # unless DRBD8.AddChildren is changed to work in parallel;
5057
      # currently it doesn't since parallel invocations of
5058
      # FindUnusedMinor will conflict
5059
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
5060
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5061
    else:
5062
      self.needed_locks[locking.LEVEL_NODE] = []
5063
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5064

    
5065
  def DeclareLocks(self, level):
5066
    # If we're not already locking all nodes in the set we have to declare the
5067
    # instance's primary/secondary nodes.
5068
    if (level == locking.LEVEL_NODE and
5069
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
5070
      self._LockInstancesNodes()
5071

    
5072
  def _RunAllocator(self):
5073
    """Compute a new secondary node using an IAllocator.
5074

5075
    """
5076
    ial = IAllocator(self,
5077
                     mode=constants.IALLOCATOR_MODE_RELOC,
5078
                     name=self.op.instance_name,
5079
                     relocate_from=[self.sec_node])
5080

    
5081
    ial.Run(self.op.iallocator)
5082

    
5083
    if not ial.success:
5084
      raise errors.OpPrereqError("Can't compute nodes using"
5085
                                 " iallocator '%s': %s" % (self.op.iallocator,
5086
                                                           ial.info))
5087
    if len(ial.nodes) != ial.required_nodes:
5088
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5089
                                 " of nodes (%s), required %s" %
5090
                                 (len(ial.nodes), ial.required_nodes))
5091
    self.op.remote_node = ial.nodes[0]
5092
    self.LogInfo("Selected new secondary for the instance: %s",
5093
                 self.op.remote_node)
5094

    
5095
  def BuildHooksEnv(self):
5096
    """Build hooks env.
5097

5098
    This runs on the master, the primary and all the secondaries.
5099

5100
    """
5101
    env = {
5102
      "MODE": self.op.mode,
5103
      "NEW_SECONDARY": self.op.remote_node,
5104
      "OLD_SECONDARY": self.instance.secondary_nodes[0],
5105
      }
5106
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5107
    nl = [
5108
      self.cfg.GetMasterNode(),
5109
      self.instance.primary_node,
5110
      ]
5111
    if self.op.remote_node is not None:
5112
      nl.append(self.op.remote_node)
5113
    return env, nl, nl
5114

    
5115
  def CheckPrereq(self):
5116
    """Check prerequisites.
5117

5118
    This checks that the instance is in the cluster.
5119

5120
    """
5121
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5122
    assert instance is not None, \
5123
      "Cannot retrieve locked instance %s" % self.op.instance_name
5124
    self.instance = instance
5125

    
5126
    if instance.disk_template != constants.DT_DRBD8:
5127
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
5128
                                 " instances")
5129

    
5130
    if len(instance.secondary_nodes) != 1:
5131
      raise errors.OpPrereqError("The instance has a strange layout,"
5132
                                 " expected one secondary but found %d" %
5133
                                 len(instance.secondary_nodes))
5134

    
5135
    self.sec_node = instance.secondary_nodes[0]
5136

    
5137
    if self.op.iallocator is not None:
5138
      self._RunAllocator()
5139

    
5140
    remote_node = self.op.remote_node
5141
    if remote_node is not None:
5142
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
5143
      assert self.remote_node_info is not None, \
5144
        "Cannot retrieve locked node %s" % remote_node
5145
    else:
5146
      self.remote_node_info = None
5147
    if remote_node == instance.primary_node:
5148
      raise errors.OpPrereqError("The specified node is the primary node of"
5149
                                 " the instance.")
5150
    elif remote_node == self.sec_node:
5151
      raise errors.OpPrereqError("The specified node is already the"
5152
                                 " secondary node of the instance.")
5153

    
5154
    if self.op.mode == constants.REPLACE_DISK_PRI:
5155
      n1 = self.tgt_node = instance.primary_node
5156
      n2 = self.oth_node = self.sec_node
5157
    elif self.op.mode == constants.REPLACE_DISK_SEC:
5158
      n1 = self.tgt_node = self.sec_node
5159
      n2 = self.oth_node = instance.primary_node
5160
    elif self.op.mode == constants.REPLACE_DISK_CHG:
5161
      n1 = self.new_node = remote_node
5162
      n2 = self.oth_node = instance.primary_node
5163
      self.tgt_node = self.sec_node
5164
      _CheckNodeNotDrained(self, remote_node)
5165
    else:
5166
      raise errors.ProgrammerError("Unhandled disk replace mode")
5167

    
5168
    _CheckNodeOnline(self, n1)
5169
    _CheckNodeOnline(self, n2)
5170

    
5171
    if not self.op.disks:
5172
      self.op.disks = range(len(instance.disks))
5173

    
5174
    for disk_idx in self.op.disks:
5175
      instance.FindDisk(disk_idx)
5176

    
5177
  def _ExecD8DiskOnly(self, feedback_fn):
5178
    """Replace a disk on the primary or secondary for dbrd8.
5179

5180
    The algorithm for replace is quite complicated:
5181

5182
      1. for each disk to be replaced:
5183

5184
        1. create new LVs on the target node with unique names
5185
        1. detach old LVs from the drbd device
5186
        1. rename old LVs to name_replaced.<time_t>
5187
        1. rename new LVs to old LVs
5188
        1. attach the new LVs (with the old names now) to the drbd device
5189

5190
      1. wait for sync across all devices
5191

5192
      1. for each modified disk:
5193

5194
        1. remove old LVs (which have the name name_replaces.<time_t>)
5195

5196
    Failures are not very well handled.
5197

5198
    """
5199
    steps_total = 6
5200
    warning, info = (self.proc.LogWarning, self.proc.LogInfo)
5201
    instance = self.instance
5202
    iv_names = {}
5203
    vgname = self.cfg.GetVGName()
5204
    # start of work
5205
    cfg = self.cfg
5206
    tgt_node = self.tgt_node
5207
    oth_node = self.oth_node
5208

    
5209
    # Step: check device activation
5210
    self.proc.LogStep(1, steps_total, "check device existence")
5211
    info("checking volume groups")
5212
    my_vg = cfg.GetVGName()
5213
    results = self.rpc.call_vg_list([oth_node, tgt_node])
5214
    if not results:
5215
      raise errors.OpExecError("Can't list volume groups on the nodes")
5216
    for node in oth_node, tgt_node:
5217
      res = results[node]
5218
      res.Raise("Error checking node %s" % node)
5219
      if my_vg not in res.payload:
5220
        raise errors.OpExecError("Volume group '%s' not found on %s" %
5221
                                 (my_vg, node))
5222
    for idx, dev in enumerate(instance.disks):
5223
      if idx not in self.op.disks:
5224
        continue
5225
      for node in tgt_node, oth_node:
5226
        info("checking disk/%d on %s" % (idx, node))
5227
        cfg.SetDiskID(dev, node)
5228
        result = self.rpc.call_blockdev_find(node, dev)
5229
        msg = result.fail_msg
5230
        if not msg and not result.payload:
5231
          msg = "disk not found"
5232
        if msg:
5233
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
5234
                                   (idx, node, msg))
5235

    
5236
    # Step: check other node consistency
5237
    self.proc.LogStep(2, steps_total, "check peer consistency")
5238
    for idx, dev in enumerate(instance.disks):
5239
      if idx not in self.op.disks:
5240
        continue
5241
      info("checking disk/%d consistency on %s" % (idx, oth_node))
5242
      if not _CheckDiskConsistency(self, dev, oth_node,
5243
                                   oth_node==instance.primary_node):
5244
        raise errors.OpExecError("Peer node (%s) has degraded storage, unsafe"
5245
                                 " to replace disks on this node (%s)" %
5246
                                 (oth_node, tgt_node))
5247

    
5248
    # Step: create new storage
5249
    self.proc.LogStep(3, steps_total, "allocate new storage")
5250
    for idx, dev in enumerate(instance.disks):
5251
      if idx not in self.op.disks:
5252
        continue
5253
      size = dev.size
5254
      cfg.SetDiskID(dev, tgt_node)
5255
      lv_names = [".disk%d_%s" % (idx, suf)
5256
                  for suf in ["data", "meta"]]
5257
      names = _GenerateUniqueNames(self, lv_names)
5258
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5259
                             logical_id=(vgname, names[0]))
5260
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5261
                             logical_id=(vgname, names[1]))
5262
      new_lvs = [lv_data, lv_meta]
5263
      old_lvs = dev.children
5264
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
5265
      info("creating new local storage on %s for %s" %
5266
           (tgt_node, dev.iv_name))
5267
      # we pass force_create=True to force the LVM creation
5268
      for new_lv in new_lvs:
5269
        _CreateBlockDev(self, tgt_node, instance, new_lv, True,
5270
                        _GetInstanceInfoText(instance), False)
5271

    
5272
    # Step: for each lv, detach+rename*2+attach
5273
    self.proc.LogStep(4, steps_total, "change drbd configuration")
5274
    for dev, old_lvs, new_lvs in iv_names.itervalues():
5275
      info("detaching %s drbd from local storage" % dev.iv_name)
5276
      result = self.rpc.call_blockdev_removechildren(tgt_node, dev, old_lvs)
5277
      result.Raise("Can't detach drbd from local storage on node"
5278
                   " %s for device %s" % (tgt_node, dev.iv_name))
5279
      #dev.children = []
5280
      #cfg.Update(instance)
5281

    
5282
      # ok, we created the new LVs, so now we know we have the needed
5283
      # storage; as such, we proceed on the target node to rename
5284
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
5285
      # using the assumption that logical_id == physical_id (which in
5286
      # turn is the unique_id on that node)
5287

    
5288
      # FIXME(iustin): use a better name for the replaced LVs
5289
      temp_suffix = int(time.time())
5290
      ren_fn = lambda d, suff: (d.physical_id[0],
5291
                                d.physical_id[1] + "_replaced-%s" % suff)
5292
      # build the rename list based on what LVs exist on the node
5293
      rlist = []
5294
      for to_ren in old_lvs:
5295
        result = self.rpc.call_blockdev_find(tgt_node, to_ren)
5296
        if not result.fail_msg and result.payload:
5297
          # device exists
5298
          rlist.append((to_ren, ren_fn(to_ren, temp_suffix)))
5299

    
5300
      info("renaming the old LVs on the target node")
5301
      result = self.rpc.call_blockdev_rename(tgt_node, rlist)
5302
      result.Raise("Can't rename old LVs on node %s" % tgt_node)
5303
      # now we rename the new LVs to the old LVs
5304
      info("renaming the new LVs on the target node")
5305
      rlist = [(new, old.physical_id) for old, new in zip(old_lvs, new_lvs)]
5306
      result = self.rpc.call_blockdev_rename(tgt_node, rlist)
5307
      result.Raise("Can't rename new LVs on node %s" % tgt_node)
5308

    
5309
      for old, new in zip(old_lvs, new_lvs):
5310
        new.logical_id = old.logical_id
5311
        cfg.SetDiskID(new, tgt_node)
5312

    
5313
      for disk in old_lvs:
5314
        disk.logical_id = ren_fn(disk, temp_suffix)
5315
        cfg.SetDiskID(disk, tgt_node)
5316

    
5317
      # now that the new lvs have the old name, we can add them to the device
5318
      info("adding new mirror component on %s" % tgt_node)
5319
      result = self.rpc.call_blockdev_addchildren(tgt_node, dev, new_lvs)
5320
      msg = result.fail_msg
5321
      if msg:
5322
        for new_lv in new_lvs:
5323
          msg2 = self.rpc.call_blockdev_remove(tgt_node, new_lv).fail_msg
5324
          if msg2:
5325
            warning("Can't rollback device %s: %s", dev, msg2,
5326
                    hint="cleanup manually the unused logical volumes")
5327
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
5328

    
5329
      dev.children = new_lvs
5330
      cfg.Update(instance)
5331

    
5332
    # Step: wait for sync
5333

    
5334
    # this can fail as the old devices are degraded and _WaitForSync
5335
    # does a combined result over all disks, so we don't check its
5336
    # return value
5337
    self.proc.LogStep(5, steps_total, "sync devices")
5338
    _WaitForSync(self, instance, unlock=True)
5339

    
5340
    # so check manually all the devices
5341
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
5342
      cfg.SetDiskID(dev, instance.primary_node)
5343
      result = self.rpc.call_blockdev_find(instance.primary_node, dev)
5344
      msg = result.fail_msg
5345
      if not msg and not result.payload:
5346
        msg = "disk not found"
5347
      if msg:
5348
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
5349
                                 (name, msg))
5350
      if result.payload[5]:
5351
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
5352

    
5353
    # Step: remove old storage
5354
    self.proc.LogStep(6, steps_total, "removing old storage")
5355
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
5356
      info("remove logical volumes for %s" % name)
5357
      for lv in old_lvs:
5358
        cfg.SetDiskID(lv, tgt_node)
5359
        msg = self.rpc.call_blockdev_remove(tgt_node, lv).fail_msg
5360
        if msg:
5361
          warning("Can't remove old LV: %s" % msg,
5362
                  hint="manually remove unused LVs")
5363
          continue
5364

    
5365
  def _ExecD8Secondary(self, feedback_fn):
5366
    """Replace the secondary node for drbd8.
5367

5368
    The algorithm for replace is quite complicated:
5369
      - for all disks of the instance:
5370
        - create new LVs on the new node with same names
5371
        - shutdown the drbd device on the old secondary
5372
        - disconnect the drbd network on the primary
5373
        - create the drbd device on the new secondary
5374
        - network attach the drbd on the primary, using an artifice:
5375
          the drbd code for Attach() will connect to the network if it
5376
          finds a device which is connected to the good local disks but
5377
          not network enabled
5378
      - wait for sync across all devices
5379
      - remove all disks from the old secondary
5380

5381
    Failures are not very well handled.
5382

5383
    """
5384
    steps_total = 6
5385
    warning, info = (self.proc.LogWarning, self.proc.LogInfo)
5386
    instance = self.instance
5387
    iv_names = {}
5388
    # start of work
5389
    cfg = self.cfg
5390
    old_node = self.tgt_node
5391
    new_node = self.new_node
5392
    pri_node = instance.primary_node
5393
    nodes_ip = {
5394
      old_node: self.cfg.GetNodeInfo(old_node).secondary_ip,
5395
      new_node: self.cfg.GetNodeInfo(new_node).secondary_ip,
5396
      pri_node: self.cfg.GetNodeInfo(pri_node).secondary_ip,
5397
      }
5398

    
5399
    # Step: check device activation
5400
    self.proc.LogStep(1, steps_total, "check device existence")
5401
    info("checking volume groups")
5402
    my_vg = cfg.GetVGName()
5403
    results = self.rpc.call_vg_list([pri_node, new_node])
5404
    for node in pri_node, new_node:
5405
      res = results[node]
5406
      res.Raise("Error checking node %s" % node)
5407
      if my_vg not in res.payload:
5408
        raise errors.OpExecError("Volume group '%s' not found on %s" %
5409
                                 (my_vg, node))
5410
    for idx, dev in enumerate(instance.disks):
5411
      if idx not in self.op.disks:
5412
        continue
5413
      info("checking disk/%d on %s" % (idx, pri_node))
5414
      cfg.SetDiskID(dev, pri_node)
5415
      result = self.rpc.call_blockdev_find(pri_node, dev)
5416
      msg = result.fail_msg
5417
      if not msg and not result.payload:
5418
        msg = "disk not found"
5419
      if msg:
5420
        raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
5421
                                 (idx, pri_node, msg))
5422

    
5423
    # Step: check other node consistency
5424
    self.proc.LogStep(2, steps_total, "check peer consistency")
5425
    for idx, dev in enumerate(instance.disks):
5426
      if idx not in self.op.disks:
5427
        continue
5428
      info("checking disk/%d consistency on %s" % (idx, pri_node))
5429
      if not _CheckDiskConsistency(self, dev, pri_node, True, ldisk=True):
5430
        raise errors.OpExecError("Primary node (%s) has degraded storage,"
5431
                                 " unsafe to replace the secondary" %
5432
                                 pri_node)
5433

    
5434
    # Step: create new storage
5435
    self.proc.LogStep(3, steps_total, "allocate new storage")
5436
    for idx, dev in enumerate(instance.disks):
5437
      info("adding new local storage on %s for disk/%d" %
5438
           (new_node, idx))
5439
      # we pass force_create=True to force LVM creation
5440
      for new_lv in dev.children:
5441
        _CreateBlockDev(self, new_node, instance, new_lv, True,
5442
                        _GetInstanceInfoText(instance), False)
5443

    
5444
    # Step 4: dbrd minors and drbd setups changes
5445
    # after this, we must manually remove the drbd minors on both the
5446
    # error and the success paths
5447
    minors = cfg.AllocateDRBDMinor([new_node for dev in instance.disks],
5448
                                   instance.name)
5449
    logging.debug("Allocated minors %s" % (minors,))
5450
    self.proc.LogStep(4, steps_total, "changing drbd configuration")
5451
    for idx, (dev, new_minor) in enumerate(zip(instance.disks, minors)):
5452
      size = dev.size
5453
      info("activating a new drbd on %s for disk/%d" % (new_node, idx))
5454
      # create new devices on new_node; note that we create two IDs:
5455
      # one without port, so the drbd will be activated without
5456
      # networking information on the new node at this stage, and one
5457
      # with network, for the latter activation in step 4
5458
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
5459
      if pri_node == o_node1:
5460
        p_minor = o_minor1
5461
      else:
5462
        p_minor = o_minor2
5463

    
5464
      new_alone_id = (pri_node, new_node, None, p_minor, new_minor, o_secret)
5465
      new_net_id = (pri_node, new_node, o_port, p_minor, new_minor, o_secret)
5466

    
5467
      iv_names[idx] = (dev, dev.children, new_net_id)
5468
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
5469
                    new_net_id)
5470
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
5471
                              logical_id=new_alone_id,
5472
                              children=dev.children,
5473
                              size=dev.size)
5474
      try:
5475
        _CreateSingleBlockDev(self, new_node, instance, new_drbd,
5476
                              _GetInstanceInfoText(instance), False)
5477
      except errors.GenericError:
5478
        self.cfg.ReleaseDRBDMinors(instance.name)
5479
        raise
5480

    
5481
    for idx, dev in enumerate(instance.disks):
5482
      # we have new devices, shutdown the drbd on the old secondary
5483
      info("shutting down drbd for disk/%d on old node" % idx)
5484
      cfg.SetDiskID(dev, old_node)
5485
      msg = self.rpc.call_blockdev_shutdown(old_node, dev).fail_msg
5486
      if msg:
5487
        warning("Failed to shutdown drbd for disk/%d on old node: %s" %
5488
                (idx, msg),
5489
                hint="Please cleanup this device manually as soon as possible")
5490

    
5491
    info("detaching primary drbds from the network (=> standalone)")
5492
    result = self.rpc.call_drbd_disconnect_net([pri_node], nodes_ip,
5493
                                               instance.disks)[pri_node]
5494

    
5495
    msg = result.fail_msg
5496
    if msg:
5497
      # detaches didn't succeed (unlikely)
5498
      self.cfg.ReleaseDRBDMinors(instance.name)
5499
      raise errors.OpExecError("Can't detach the disks from the network on"
5500
                               " old node: %s" % (msg,))
5501

    
5502
    # if we managed to detach at least one, we update all the disks of
5503
    # the instance to point to the new secondary
5504
    info("updating instance configuration")
5505
    for dev, _, new_logical_id in iv_names.itervalues():
5506
      dev.logical_id = new_logical_id
5507
      cfg.SetDiskID(dev, pri_node)
5508
    cfg.Update(instance)
5509

    
5510
    # and now perform the drbd attach
5511
    info("attaching primary drbds to new secondary (standalone => connected)")
5512
    result = self.rpc.call_drbd_attach_net([pri_node, new_node], nodes_ip,
5513
                                           instance.disks, instance.name,
5514
                                           False)
5515
    for to_node, to_result in result.items():
5516
      msg = to_result.fail_msg
5517
      if msg:
5518
        warning("can't attach drbd disks on node %s: %s", to_node, msg,
5519
                hint="please do a gnt-instance info to see the"
5520
                " status of disks")
5521

    
5522
    # this can fail as the old devices are degraded and _WaitForSync
5523
    # does a combined result over all disks, so we don't check its
5524
    # return value
5525
    self.proc.LogStep(5, steps_total, "sync devices")
5526
    _WaitForSync(self, instance, unlock=True)
5527

    
5528
    # so check manually all the devices
5529
    for idx, (dev, old_lvs, _) in iv_names.iteritems():
5530
      cfg.SetDiskID(dev, pri_node)
5531
      result = self.rpc.call_blockdev_find(pri_node, dev)
5532
      msg = result.fail_msg
5533
      if not msg and not result.payload:
5534
        msg = "disk not found"
5535
      if msg:
5536
        raise errors.OpExecError("Can't find DRBD device disk/%d: %s" %
5537
                                 (idx, msg))
5538
      if result.payload[5]:
5539
        raise errors.OpExecError("DRBD device disk/%d is degraded!" % idx)
5540

    
5541
    self.proc.LogStep(6, steps_total, "removing old storage")
5542
    for idx, (dev, old_lvs, _) in iv_names.iteritems():
5543
      info("remove logical volumes for disk/%d" % idx)
5544
      for lv in old_lvs:
5545
        cfg.SetDiskID(lv, old_node)
5546
        msg = self.rpc.call_blockdev_remove(old_node, lv).fail_msg
5547
        if msg:
5548
          warning("Can't remove LV on old secondary: %s", msg,
5549
                  hint="Cleanup stale volumes by hand")
5550

    
5551
  def Exec(self, feedback_fn):
5552
    """Execute disk replacement.
5553

5554
    This dispatches the disk replacement to the appropriate handler.
5555

5556
    """
5557
    instance = self.instance
5558

    
5559
    # Activate the instance disks if we're replacing them on a down instance
5560
    if not instance.admin_up:
5561
      _StartInstanceDisks(self, instance, True)
5562

    
5563
    if self.op.mode == constants.REPLACE_DISK_CHG:
5564
      fn = self._ExecD8Secondary
5565
    else:
5566
      fn = self._ExecD8DiskOnly
5567

    
5568
    ret = fn(feedback_fn)
5569

    
5570
    # Deactivate the instance disks if we're replacing them on a down instance
5571
    if not instance.admin_up:
5572
      _SafeShutdownInstanceDisks(self, instance)
5573

    
5574
    return ret
5575

    
5576

    
5577
class LUGrowDisk(LogicalUnit):
5578
  """Grow a disk of an instance.
5579

5580
  """
5581
  HPATH = "disk-grow"
5582
  HTYPE = constants.HTYPE_INSTANCE
5583
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
5584
  REQ_BGL = False
5585

    
5586
  def ExpandNames(self):
5587
    self._ExpandAndLockInstance()
5588
    self.needed_locks[locking.LEVEL_NODE] = []
5589
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5590

    
5591
  def DeclareLocks(self, level):
5592
    if level == locking.LEVEL_NODE:
5593
      self._LockInstancesNodes()
5594

    
5595
  def BuildHooksEnv(self):
5596
    """Build hooks env.
5597

5598
    This runs on the master, the primary and all the secondaries.
5599

5600
    """
5601
    env = {
5602
      "DISK": self.op.disk,
5603
      "AMOUNT": self.op.amount,
5604
      }
5605
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5606
    nl = [
5607
      self.cfg.GetMasterNode(),
5608
      self.instance.primary_node,
5609
      ]
5610
    return env, nl, nl
5611

    
5612
  def CheckPrereq(self):
5613
    """Check prerequisites.
5614

5615
    This checks that the instance is in the cluster.
5616

5617
    """
5618
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5619
    assert instance is not None, \
5620
      "Cannot retrieve locked instance %s" % self.op.instance_name
5621
    nodenames = list(instance.all_nodes)
5622
    for node in nodenames:
5623
      _CheckNodeOnline(self, node)
5624

    
5625

    
5626
    self.instance = instance
5627

    
5628
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
5629
      raise errors.OpPrereqError("Instance's disk layout does not support"
5630
                                 " growing.")
5631

    
5632
    self.disk = instance.FindDisk(self.op.disk)
5633

    
5634
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
5635
                                       instance.hypervisor)
5636
    for node in nodenames:
5637
      info = nodeinfo[node]
5638
      info.Raise("Cannot get current information from node %s" % node)
5639
      vg_free = info.payload.get('vg_free', None)
5640
      if not isinstance(vg_free, int):
5641
        raise errors.OpPrereqError("Can't compute free disk space on"
5642
                                   " node %s" % node)
5643
      if self.op.amount > vg_free:
5644
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
5645
                                   " %d MiB available, %d MiB required" %
5646
                                   (node, vg_free, self.op.amount))
5647

    
5648
  def Exec(self, feedback_fn):
5649
    """Execute disk grow.
5650

5651
    """
5652
    instance = self.instance
5653
    disk = self.disk
5654
    for node in instance.all_nodes:
5655
      self.cfg.SetDiskID(disk, node)
5656
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
5657
      result.Raise("Grow request failed to node %s" % node)
5658
    disk.RecordGrow(self.op.amount)
5659
    self.cfg.Update(instance)
5660
    if self.op.wait_for_sync:
5661
      disk_abort = not _WaitForSync(self, instance)
5662
      if disk_abort:
5663
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
5664
                             " status.\nPlease check the instance.")
5665

    
5666

    
5667
class LUQueryInstanceData(NoHooksLU):
5668
  """Query runtime instance data.
5669

5670
  """
5671
  _OP_REQP = ["instances", "static"]
5672
  REQ_BGL = False
5673

    
5674
  def ExpandNames(self):
5675
    self.needed_locks = {}
5676
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
5677

    
5678
    if not isinstance(self.op.instances, list):
5679
      raise errors.OpPrereqError("Invalid argument type 'instances'")
5680

    
5681
    if self.op.instances:
5682
      self.wanted_names = []
5683
      for name in self.op.instances:
5684
        full_name = self.cfg.ExpandInstanceName(name)
5685
        if full_name is None:
5686
          raise errors.OpPrereqError("Instance '%s' not known" % name)
5687
        self.wanted_names.append(full_name)
5688
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
5689
    else:
5690
      self.wanted_names = None
5691
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5692

    
5693
    self.needed_locks[locking.LEVEL_NODE] = []
5694
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5695

    
5696
  def DeclareLocks(self, level):
5697
    if level == locking.LEVEL_NODE:
5698
      self._LockInstancesNodes()
5699

    
5700
  def CheckPrereq(self):
5701
    """Check prerequisites.
5702

5703
    This only checks the optional instance list against the existing names.
5704

5705
    """
5706
    if self.wanted_names is None:
5707
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5708

    
5709
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
5710
                             in self.wanted_names]
5711
    return
5712

    
5713
  def _ComputeDiskStatus(self, instance, snode, dev):
5714
    """Compute block device status.
5715

5716
    """
5717
    static = self.op.static
5718
    if not static:
5719
      self.cfg.SetDiskID(dev, instance.primary_node)
5720
      dev_pstatus = self.rpc.call_blockdev_find(instance.primary_node, dev)
5721
      if dev_pstatus.offline:
5722
        dev_pstatus = None
5723
      else:
5724
        dev_pstatus.Raise("Can't compute disk status for %s" % instance.name)
5725
        dev_pstatus = dev_pstatus.payload
5726
    else:
5727
      dev_pstatus = None
5728

    
5729
    if dev.dev_type in constants.LDS_DRBD:
5730
      # we change the snode then (otherwise we use the one passed in)
5731
      if dev.logical_id[0] == instance.primary_node:
5732
        snode = dev.logical_id[1]
5733
      else:
5734
        snode = dev.logical_id[0]
5735

    
5736
    if snode and not static:
5737
      self.cfg.SetDiskID(dev, snode)
5738
      dev_sstatus = self.rpc.call_blockdev_find(snode, dev)
5739
      if dev_sstatus.offline:
5740
        dev_sstatus = None
5741
      else:
5742
        dev_sstatus.Raise("Can't compute disk status for %s" % instance.name)
5743
        dev_sstatus = dev_sstatus.payload
5744
    else:
5745
      dev_sstatus = None
5746

    
5747
    if dev.children:
5748
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
5749
                      for child in dev.children]
5750
    else:
5751
      dev_children = []
5752

    
5753
    data = {
5754
      "iv_name": dev.iv_name,
5755
      "dev_type": dev.dev_type,
5756
      "logical_id": dev.logical_id,
5757
      "physical_id": dev.physical_id,
5758
      "pstatus": dev_pstatus,
5759
      "sstatus": dev_sstatus,
5760
      "children": dev_children,
5761
      "mode": dev.mode,
5762
      }
5763

    
5764
    return data
5765

    
5766
  def Exec(self, feedback_fn):
5767
    """Gather and return data"""
5768
    result = {}
5769

    
5770
    cluster = self.cfg.GetClusterInfo()
5771

    
5772
    for instance in self.wanted_instances:
5773
      if not self.op.static:
5774
        remote_info = self.rpc.call_instance_info(instance.primary_node,
5775
                                                  instance.name,
5776
                                                  instance.hypervisor)
5777
        remote_info.Raise("Error checking node %s" % instance.primary_node)
5778
        remote_info = remote_info.payload
5779
        if remote_info and "state" in remote_info:
5780
          remote_state = "up"
5781
        else:
5782
          remote_state = "down"
5783
      else:
5784
        remote_state = None
5785
      if instance.admin_up:
5786
        config_state = "up"
5787
      else:
5788
        config_state = "down"
5789

    
5790
      disks = [self._ComputeDiskStatus(instance, None, device)
5791
               for device in instance.disks]
5792

    
5793
      idict = {
5794
        "name": instance.name,
5795
        "config_state": config_state,
5796
        "run_state": remote_state,
5797
        "pnode": instance.primary_node,
5798
        "snodes": instance.secondary_nodes,
5799
        "os": instance.os,
5800
        "nics": [(nic.mac, nic.ip, nic.bridge) for nic in instance.nics],
5801
        "disks": disks,
5802
        "hypervisor": instance.hypervisor,
5803
        "network_port": instance.network_port,
5804
        "hv_instance": instance.hvparams,
5805
        "hv_actual": cluster.FillHV(instance),
5806
        "be_instance": instance.beparams,
5807
        "be_actual": cluster.FillBE(instance),
5808
        }
5809

    
5810
      result[instance.name] = idict
5811

    
5812
    return result
5813

    
5814

    
5815
class LUSetInstanceParams(LogicalUnit):
5816
  """Modifies an instances's parameters.
5817

5818
  """
5819
  HPATH = "instance-modify"
5820
  HTYPE = constants.HTYPE_INSTANCE
5821
  _OP_REQP = ["instance_name"]
5822
  REQ_BGL = False
5823

    
5824
  def CheckArguments(self):
5825
    if not hasattr(self.op, 'nics'):
5826
      self.op.nics = []
5827
    if not hasattr(self.op, 'disks'):
5828
      self.op.disks = []
5829
    if not hasattr(self.op, 'beparams'):
5830
      self.op.beparams = {}
5831
    if not hasattr(self.op, 'hvparams'):
5832
      self.op.hvparams = {}
5833
    self.op.force = getattr(self.op, "force", False)
5834
    if not (self.op.nics or self.op.disks or
5835
            self.op.hvparams or self.op.beparams):
5836
      raise errors.OpPrereqError("No changes submitted")
5837

    
5838
    # Disk validation
5839
    disk_addremove = 0
5840
    for disk_op, disk_dict in self.op.disks:
5841
      if disk_op == constants.DDM_REMOVE:
5842
        disk_addremove += 1
5843
        continue
5844
      elif disk_op == constants.DDM_ADD:
5845
        disk_addremove += 1
5846
      else:
5847
        if not isinstance(disk_op, int):
5848
          raise errors.OpPrereqError("Invalid disk index")
5849
      if disk_op == constants.DDM_ADD:
5850
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
5851
        if mode not in constants.DISK_ACCESS_SET:
5852
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
5853
        size = disk_dict.get('size', None)
5854
        if size is None:
5855
          raise errors.OpPrereqError("Required disk parameter size missing")
5856
        try:
5857
          size = int(size)
5858
        except ValueError, err:
5859
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
5860
                                     str(err))
5861
        disk_dict['size'] = size
5862
      else:
5863
        # modification of disk
5864
        if 'size' in disk_dict:
5865
          raise errors.OpPrereqError("Disk size change not possible, use"
5866
                                     " grow-disk")
5867

    
5868
    if disk_addremove > 1:
5869
      raise errors.OpPrereqError("Only one disk add or remove operation"
5870
                                 " supported at a time")
5871

    
5872
    # NIC validation
5873
    nic_addremove = 0
5874
    for nic_op, nic_dict in self.op.nics:
5875
      if nic_op == constants.DDM_REMOVE:
5876
        nic_addremove += 1
5877
        continue
5878
      elif nic_op == constants.DDM_ADD:
5879
        nic_addremove += 1
5880
      else:
5881
        if not isinstance(nic_op, int):
5882
          raise errors.OpPrereqError("Invalid nic index")
5883

    
5884
      # nic_dict should be a dict
5885
      nic_ip = nic_dict.get('ip', None)
5886
      if nic_ip is not None:
5887
        if nic_ip.lower() == constants.VALUE_NONE:
5888
          nic_dict['ip'] = None
5889
        else:
5890
          if not utils.IsValidIP(nic_ip):
5891
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
5892

    
5893
      nic_bridge = nic_dict.get('bridge', None)
5894
      nic_link = nic_dict.get('link', None)
5895
      if nic_bridge and nic_link:
5896
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link' at the same time")
5897
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
5898
        nic_dict['bridge'] = None
5899
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
5900
        nic_dict['link'] = None
5901

    
5902
      if nic_op == constants.DDM_ADD:
5903
        nic_mac = nic_dict.get('mac', None)
5904
        if nic_mac is None:
5905
          nic_dict['mac'] = constants.VALUE_AUTO
5906

    
5907
      if 'mac' in nic_dict:
5908
        nic_mac = nic_dict['mac']
5909
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5910
          if not utils.IsValidMac(nic_mac):
5911
            raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
5912
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
5913
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
5914
                                     " modifying an existing nic")
5915

    
5916
    if nic_addremove > 1:
5917
      raise errors.OpPrereqError("Only one NIC add or remove operation"
5918
                                 " supported at a time")
5919

    
5920
  def ExpandNames(self):
5921
    self._ExpandAndLockInstance()
5922
    self.needed_locks[locking.LEVEL_NODE] = []
5923
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5924

    
5925
  def DeclareLocks(self, level):
5926
    if level == locking.LEVEL_NODE:
5927
      self._LockInstancesNodes()
5928

    
5929
  def BuildHooksEnv(self):
5930
    """Build hooks env.
5931

5932
    This runs on the master, primary and secondaries.
5933

5934
    """
5935
    args = dict()
5936
    if constants.BE_MEMORY in self.be_new:
5937
      args['memory'] = self.be_new[constants.BE_MEMORY]
5938
    if constants.BE_VCPUS in self.be_new:
5939
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
5940
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
5941
    # information at all.
5942
    if self.op.nics:
5943
      args['nics'] = []
5944
      nic_override = dict(self.op.nics)
5945
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
5946
      for idx, nic in enumerate(self.instance.nics):
5947
        if idx in nic_override:
5948
          this_nic_override = nic_override[idx]
5949
        else:
5950
          this_nic_override = {}
5951
        if 'ip' in this_nic_override:
5952
          ip = this_nic_override['ip']
5953
        else:
5954
          ip = nic.ip
5955
        if 'mac' in this_nic_override:
5956
          mac = this_nic_override['mac']
5957
        else:
5958
          mac = nic.mac
5959
        if idx in self.nic_pnew:
5960
          nicparams = self.nic_pnew[idx]
5961
        else:
5962
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
5963
        mode = nicparams[constants.NIC_MODE]
5964
        link = nicparams[constants.NIC_LINK]
5965
        args['nics'].append((ip, mac, mode, link))
5966
      if constants.DDM_ADD in nic_override:
5967
        ip = nic_override[constants.DDM_ADD].get('ip', None)
5968
        mac = nic_override[constants.DDM_ADD]['mac']
5969
        nicparams = self.nic_pnew[constants.DDM_ADD]
5970
        mode = nicparams[constants.NIC_MODE]
5971
        link = nicparams[constants.NIC_LINK]
5972
        args['nics'].append((ip, mac, mode, link))
5973
      elif constants.DDM_REMOVE in nic_override:
5974
        del args['nics'][-1]
5975

    
5976
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
5977
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5978
    return env, nl, nl
5979

    
5980
  def _GetUpdatedParams(self, old_params, update_dict,
5981
                        default_values, parameter_types):
5982
    """Return the new params dict for the given params.
5983

5984
    @type old_params: dict
5985
    @type old_params: old parameters
5986
    @type update_dict: dict
5987
    @type update_dict: dict containing new parameter values,
5988
                       or constants.VALUE_DEFAULT to reset the
5989
                       parameter to its default value
5990
    @type default_values: dict
5991
    @param default_values: default values for the filled parameters
5992
    @type parameter_types: dict
5993
    @param parameter_types: dict mapping target dict keys to types
5994
                            in constants.ENFORCEABLE_TYPES
5995
    @rtype: (dict, dict)
5996
    @return: (new_parameters, filled_parameters)
5997

5998
    """
5999
    params_copy = copy.deepcopy(old_params)
6000
    for key, val in update_dict.iteritems():
6001
      if val == constants.VALUE_DEFAULT:
6002
        try:
6003
          del params_copy[key]
6004
        except KeyError:
6005
          pass
6006
      else:
6007
        params_copy[key] = val
6008
    utils.ForceDictType(params_copy, parameter_types)
6009
    params_filled = objects.FillDict(default_values, params_copy)
6010
    return (params_copy, params_filled)
6011

    
6012
  def CheckPrereq(self):
6013
    """Check prerequisites.
6014

6015
    This only checks the instance list against the existing names.
6016

6017
    """
6018
    force = self.force = self.op.force
6019

    
6020
    # checking the new params on the primary/secondary nodes
6021

    
6022
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6023
    cluster = self.cluster = self.cfg.GetClusterInfo()
6024
    assert self.instance is not None, \
6025
      "Cannot retrieve locked instance %s" % self.op.instance_name
6026
    pnode = instance.primary_node
6027
    nodelist = list(instance.all_nodes)
6028

    
6029
    # hvparams processing
6030
    if self.op.hvparams:
6031
      i_hvdict, hv_new = self._GetUpdatedParams(
6032
                             instance.hvparams, self.op.hvparams,
6033
                             cluster.hvparams[instance.hypervisor],
6034
                             constants.HVS_PARAMETER_TYPES)
6035
      # local check
6036
      hypervisor.GetHypervisor(
6037
        instance.hypervisor).CheckParameterSyntax(hv_new)
6038
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
6039
      self.hv_new = hv_new # the new actual values
6040
      self.hv_inst = i_hvdict # the new dict (without defaults)
6041
    else:
6042
      self.hv_new = self.hv_inst = {}
6043

    
6044
    # beparams processing
6045
    if self.op.beparams:
6046
      i_bedict, be_new = self._GetUpdatedParams(
6047
                             instance.beparams, self.op.beparams,
6048
                             cluster.beparams[constants.PP_DEFAULT],
6049
                             constants.BES_PARAMETER_TYPES)
6050
      self.be_new = be_new # the new actual values
6051
      self.be_inst = i_bedict # the new dict (without defaults)
6052
    else:
6053
      self.be_new = self.be_inst = {}
6054

    
6055
    self.warn = []
6056

    
6057
    if constants.BE_MEMORY in self.op.beparams and not self.force:
6058
      mem_check_list = [pnode]
6059
      if be_new[constants.BE_AUTO_BALANCE]:
6060
        # either we changed auto_balance to yes or it was from before
6061
        mem_check_list.extend(instance.secondary_nodes)
6062
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
6063
                                                  instance.hypervisor)
6064
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
6065
                                         instance.hypervisor)
6066
      pninfo = nodeinfo[pnode]
6067
      msg = pninfo.fail_msg
6068
      if msg:
6069
        # Assume the primary node is unreachable and go ahead
6070
        self.warn.append("Can't get info from primary node %s: %s" %
6071
                         (pnode,  msg))
6072
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
6073
        self.warn.append("Node data from primary node %s doesn't contain"
6074
                         " free memory information" % pnode)
6075
      elif instance_info.fail_msg:
6076
        self.warn.append("Can't get instance runtime information: %s" %
6077
                        instance_info.fail_msg)
6078
      else:
6079
        if instance_info.payload:
6080
          current_mem = int(instance_info.payload['memory'])
6081
        else:
6082
          # Assume instance not running
6083
          # (there is a slight race condition here, but it's not very probable,
6084
          # and we have no other way to check)
6085
          current_mem = 0
6086
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
6087
                    pninfo.payload['memory_free'])
6088
        if miss_mem > 0:
6089
          raise errors.OpPrereqError("This change will prevent the instance"
6090
                                     " from starting, due to %d MB of memory"
6091
                                     " missing on its primary node" % miss_mem)
6092

    
6093
      if be_new[constants.BE_AUTO_BALANCE]:
6094
        for node, nres in nodeinfo.items():
6095
          if node not in instance.secondary_nodes:
6096
            continue
6097
          msg = nres.fail_msg
6098
          if msg:
6099
            self.warn.append("Can't get info from secondary node %s: %s" %
6100
                             (node, msg))
6101
          elif not isinstance(nres.payload.get('memory_free', None), int):
6102
            self.warn.append("Secondary node %s didn't return free"
6103
                             " memory information" % node)
6104
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
6105
            self.warn.append("Not enough memory to failover instance to"
6106
                             " secondary node %s" % node)
6107

    
6108
    # NIC processing
6109
    self.nic_pnew = {}
6110
    self.nic_pinst = {}
6111
    for nic_op, nic_dict in self.op.nics:
6112
      if nic_op == constants.DDM_REMOVE:
6113
        if not instance.nics:
6114
          raise errors.OpPrereqError("Instance has no NICs, cannot remove")
6115
        continue
6116
      if nic_op != constants.DDM_ADD:
6117
        # an existing nic
6118
        if nic_op < 0 or nic_op >= len(instance.nics):
6119
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
6120
                                     " are 0 to %d" %
6121
                                     (nic_op, len(instance.nics)))
6122
        old_nic_params = instance.nics[nic_op].nicparams
6123
        old_nic_ip = instance.nics[nic_op].ip
6124
      else:
6125
        old_nic_params = {}
6126
        old_nic_ip = None
6127

    
6128
      update_params_dict = dict([(key, nic_dict[key])
6129
                                 for key in constants.NICS_PARAMETERS
6130
                                 if key in nic_dict])
6131

    
6132
      if 'bridge' in nic_dict:
6133
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
6134

    
6135
      new_nic_params, new_filled_nic_params = \
6136
          self._GetUpdatedParams(old_nic_params, update_params_dict,
6137
                                 cluster.nicparams[constants.PP_DEFAULT],
6138
                                 constants.NICS_PARAMETER_TYPES)
6139
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
6140
      self.nic_pinst[nic_op] = new_nic_params
6141
      self.nic_pnew[nic_op] = new_filled_nic_params
6142
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
6143

    
6144
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
6145
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
6146
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
6147
        if msg:
6148
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
6149
          if self.force:
6150
            self.warn.append(msg)
6151
          else:
6152
            raise errors.OpPrereqError(msg)
6153
      if new_nic_mode == constants.NIC_MODE_ROUTED:
6154
        if 'ip' in nic_dict:
6155
          nic_ip = nic_dict['ip']
6156
        else:
6157
          nic_ip = old_nic_ip
6158
        if nic_ip is None:
6159
          raise errors.OpPrereqError('Cannot set the nic ip to None'
6160
                                     ' on a routed nic')
6161
      if 'mac' in nic_dict:
6162
        nic_mac = nic_dict['mac']
6163
        if nic_mac is None:
6164
          raise errors.OpPrereqError('Cannot set the nic mac to None')
6165
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6166
          # otherwise generate the mac
6167
          nic_dict['mac'] = self.cfg.GenerateMAC()
6168
        else:
6169
          # or validate/reserve the current one
6170
          if self.cfg.IsMacInUse(nic_mac):
6171
            raise errors.OpPrereqError("MAC address %s already in use"
6172
                                       " in cluster" % nic_mac)
6173

    
6174
    # DISK processing
6175
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
6176
      raise errors.OpPrereqError("Disk operations not supported for"
6177
                                 " diskless instances")
6178
    for disk_op, disk_dict in self.op.disks:
6179
      if disk_op == constants.DDM_REMOVE:
6180
        if len(instance.disks) == 1:
6181
          raise errors.OpPrereqError("Cannot remove the last disk of"
6182
                                     " an instance")
6183
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
6184
        ins_l = ins_l[pnode]
6185
        msg = ins_l.fail_msg
6186
        if msg:
6187
          raise errors.OpPrereqError("Can't contact node %s: %s" %
6188
                                     (pnode, msg))
6189
        if instance.name in ins_l.payload:
6190
          raise errors.OpPrereqError("Instance is running, can't remove"
6191
                                     " disks.")
6192

    
6193
      if (disk_op == constants.DDM_ADD and
6194
          len(instance.nics) >= constants.MAX_DISKS):
6195
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
6196
                                   " add more" % constants.MAX_DISKS)
6197
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
6198
        # an existing disk
6199
        if disk_op < 0 or disk_op >= len(instance.disks):
6200
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
6201
                                     " are 0 to %d" %
6202
                                     (disk_op, len(instance.disks)))
6203

    
6204
    return
6205

    
6206
  def Exec(self, feedback_fn):
6207
    """Modifies an instance.
6208

6209
    All parameters take effect only at the next restart of the instance.
6210

6211
    """
6212
    # Process here the warnings from CheckPrereq, as we don't have a
6213
    # feedback_fn there.
6214
    for warn in self.warn:
6215
      feedback_fn("WARNING: %s" % warn)
6216

    
6217
    result = []
6218
    instance = self.instance
6219
    cluster = self.cluster
6220
    # disk changes
6221
    for disk_op, disk_dict in self.op.disks:
6222
      if disk_op == constants.DDM_REMOVE:
6223
        # remove the last disk
6224
        device = instance.disks.pop()
6225
        device_idx = len(instance.disks)
6226
        for node, disk in device.ComputeNodeTree(instance.primary_node):
6227
          self.cfg.SetDiskID(disk, node)
6228
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
6229
          if msg:
6230
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
6231
                            " continuing anyway", device_idx, node, msg)
6232
        result.append(("disk/%d" % device_idx, "remove"))
6233
      elif disk_op == constants.DDM_ADD:
6234
        # add a new disk
6235
        if instance.disk_template == constants.DT_FILE:
6236
          file_driver, file_path = instance.disks[0].logical_id
6237
          file_path = os.path.dirname(file_path)
6238
        else:
6239
          file_driver = file_path = None
6240
        disk_idx_base = len(instance.disks)
6241
        new_disk = _GenerateDiskTemplate(self,
6242
                                         instance.disk_template,
6243
                                         instance.name, instance.primary_node,
6244
                                         instance.secondary_nodes,
6245
                                         [disk_dict],
6246
                                         file_path,
6247
                                         file_driver,
6248
                                         disk_idx_base)[0]
6249
        instance.disks.append(new_disk)
6250
        info = _GetInstanceInfoText(instance)
6251

    
6252
        logging.info("Creating volume %s for instance %s",
6253
                     new_disk.iv_name, instance.name)
6254
        # Note: this needs to be kept in sync with _CreateDisks
6255
        #HARDCODE
6256
        for node in instance.all_nodes:
6257
          f_create = node == instance.primary_node
6258
          try:
6259
            _CreateBlockDev(self, node, instance, new_disk,
6260
                            f_create, info, f_create)
6261
          except errors.OpExecError, err:
6262
            self.LogWarning("Failed to create volume %s (%s) on"
6263
                            " node %s: %s",
6264
                            new_disk.iv_name, new_disk, node, err)
6265
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
6266
                       (new_disk.size, new_disk.mode)))
6267
      else:
6268
        # change a given disk
6269
        instance.disks[disk_op].mode = disk_dict['mode']
6270
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
6271
    # NIC changes
6272
    for nic_op, nic_dict in self.op.nics:
6273
      if nic_op == constants.DDM_REMOVE:
6274
        # remove the last nic
6275
        del instance.nics[-1]
6276
        result.append(("nic.%d" % len(instance.nics), "remove"))
6277
      elif nic_op == constants.DDM_ADD:
6278
        # mac and bridge should be set, by now
6279
        mac = nic_dict['mac']
6280
        ip = nic_dict.get('ip', None)
6281
        nicparams = self.nic_pinst[constants.DDM_ADD]
6282
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
6283
        instance.nics.append(new_nic)
6284
        result.append(("nic.%d" % (len(instance.nics) - 1),
6285
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
6286
                       (new_nic.mac, new_nic.ip,
6287
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
6288
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
6289
                       )))
6290
      else:
6291
        for key in 'mac', 'ip':
6292
          if key in nic_dict:
6293
            setattr(instance.nics[nic_op], key, nic_dict[key])
6294
        if nic_op in self.nic_pnew:
6295
          instance.nics[nic_op].nicparams = self.nic_pnew[nic_op]
6296
        for key, val in nic_dict.iteritems():
6297
          result.append(("nic.%s/%d" % (key, nic_op), val))
6298

    
6299
    # hvparams changes
6300
    if self.op.hvparams:
6301
      instance.hvparams = self.hv_inst
6302
      for key, val in self.op.hvparams.iteritems():
6303
        result.append(("hv/%s" % key, val))
6304

    
6305
    # beparams changes
6306
    if self.op.beparams:
6307
      instance.beparams = self.be_inst
6308
      for key, val in self.op.beparams.iteritems():
6309
        result.append(("be/%s" % key, val))
6310

    
6311
    self.cfg.Update(instance)
6312

    
6313
    return result
6314

    
6315

    
6316
class LUQueryExports(NoHooksLU):
6317
  """Query the exports list
6318

6319
  """
6320
  _OP_REQP = ['nodes']
6321
  REQ_BGL = False
6322

    
6323
  def ExpandNames(self):
6324
    self.needed_locks = {}
6325
    self.share_locks[locking.LEVEL_NODE] = 1
6326
    if not self.op.nodes:
6327
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6328
    else:
6329
      self.needed_locks[locking.LEVEL_NODE] = \
6330
        _GetWantedNodes(self, self.op.nodes)
6331

    
6332
  def CheckPrereq(self):
6333
    """Check prerequisites.
6334

6335
    """
6336
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
6337

    
6338
  def Exec(self, feedback_fn):
6339
    """Compute the list of all the exported system images.
6340

6341
    @rtype: dict
6342
    @return: a dictionary with the structure node->(export-list)
6343
        where export-list is a list of the instances exported on
6344
        that node.
6345

6346
    """
6347
    rpcresult = self.rpc.call_export_list(self.nodes)
6348
    result = {}
6349
    for node in rpcresult:
6350
      if rpcresult[node].fail_msg:
6351
        result[node] = False
6352
      else:
6353
        result[node] = rpcresult[node].payload
6354

    
6355
    return result
6356

    
6357

    
6358
class LUExportInstance(LogicalUnit):
6359
  """Export an instance to an image in the cluster.
6360

6361
  """
6362
  HPATH = "instance-export"
6363
  HTYPE = constants.HTYPE_INSTANCE
6364
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
6365
  REQ_BGL = False
6366

    
6367
  def ExpandNames(self):
6368
    self._ExpandAndLockInstance()
6369
    # FIXME: lock only instance primary and destination node
6370
    #
6371
    # Sad but true, for now we have do lock all nodes, as we don't know where
6372
    # the previous export might be, and and in this LU we search for it and
6373
    # remove it from its current node. In the future we could fix this by:
6374
    #  - making a tasklet to search (share-lock all), then create the new one,
6375
    #    then one to remove, after
6376
    #  - removing the removal operation altoghether
6377
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6378

    
6379
  def DeclareLocks(self, level):
6380
    """Last minute lock declaration."""
6381
    # All nodes are locked anyway, so nothing to do here.
6382

    
6383
  def BuildHooksEnv(self):
6384
    """Build hooks env.
6385

6386
    This will run on the master, primary node and target node.
6387

6388
    """
6389
    env = {
6390
      "EXPORT_NODE": self.op.target_node,
6391
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
6392
      }
6393
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6394
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
6395
          self.op.target_node]
6396
    return env, nl, nl
6397

    
6398
  def CheckPrereq(self):
6399
    """Check prerequisites.
6400

6401
    This checks that the instance and node names are valid.
6402

6403
    """
6404
    instance_name = self.op.instance_name
6405
    self.instance = self.cfg.GetInstanceInfo(instance_name)
6406
    assert self.instance is not None, \
6407
          "Cannot retrieve locked instance %s" % self.op.instance_name
6408
    _CheckNodeOnline(self, self.instance.primary_node)
6409

    
6410
    self.dst_node = self.cfg.GetNodeInfo(
6411
      self.cfg.ExpandNodeName(self.op.target_node))
6412

    
6413
    if self.dst_node is None:
6414
      # This is wrong node name, not a non-locked node
6415
      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
6416
    _CheckNodeOnline(self, self.dst_node.name)
6417
    _CheckNodeNotDrained(self, self.dst_node.name)
6418

    
6419
    # instance disk type verification
6420
    for disk in self.instance.disks:
6421
      if disk.dev_type == constants.LD_FILE:
6422
        raise errors.OpPrereqError("Export not supported for instances with"
6423
                                   " file-based disks")
6424

    
6425
  def Exec(self, feedback_fn):
6426
    """Export an instance to an image in the cluster.
6427

6428
    """
6429
    instance = self.instance
6430
    dst_node = self.dst_node
6431
    src_node = instance.primary_node
6432
    if self.op.shutdown:
6433
      # shutdown the instance, but not the disks
6434
      result = self.rpc.call_instance_shutdown(src_node, instance)
6435
      result.Raise("Could not shutdown instance %s on"
6436
                   " node %s" % (instance.name, src_node))
6437

    
6438
    vgname = self.cfg.GetVGName()
6439

    
6440
    snap_disks = []
6441

    
6442
    # set the disks ID correctly since call_instance_start needs the
6443
    # correct drbd minor to create the symlinks
6444
    for disk in instance.disks:
6445
      self.cfg.SetDiskID(disk, src_node)
6446

    
6447
    try:
6448
      for idx, disk in enumerate(instance.disks):
6449
        # result.payload will be a snapshot of an lvm leaf of the one we passed
6450
        result = self.rpc.call_blockdev_snapshot(src_node, disk)
6451
        msg = result.fail_msg
6452
        if msg:
6453
          self.LogWarning("Could not snapshot disk/%s on node %s: %s",
6454
                          idx, src_node, msg)
6455
          snap_disks.append(False)
6456
        else:
6457
          disk_id = (vgname, result.payload)
6458
          new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
6459
                                 logical_id=disk_id, physical_id=disk_id,
6460
                                 iv_name=disk.iv_name)
6461
          snap_disks.append(new_dev)
6462

    
6463
    finally:
6464
      if self.op.shutdown and instance.admin_up:
6465
        result = self.rpc.call_instance_start(src_node, instance, None, None)
6466
        msg = result.fail_msg
6467
        if msg:
6468
          _ShutdownInstanceDisks(self, instance)
6469
          raise errors.OpExecError("Could not start instance: %s" % msg)
6470

    
6471
    # TODO: check for size
6472

    
6473
    cluster_name = self.cfg.GetClusterName()
6474
    for idx, dev in enumerate(snap_disks):
6475
      if dev:
6476
        result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
6477
                                               instance, cluster_name, idx)
6478
        msg = result.fail_msg
6479
        if msg:
6480
          self.LogWarning("Could not export disk/%s from node %s to"
6481
                          " node %s: %s", idx, src_node, dst_node.name, msg)
6482
        msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
6483
        if msg:
6484
          self.LogWarning("Could not remove snapshot for disk/%d from node"
6485
                          " %s: %s", idx, src_node, msg)
6486

    
6487
    result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
6488
    msg = result.fail_msg
6489
    if msg:
6490
      self.LogWarning("Could not finalize export for instance %s"
6491
                      " on node %s: %s", instance.name, dst_node.name, msg)
6492

    
6493
    nodelist = self.cfg.GetNodeList()
6494
    nodelist.remove(dst_node.name)
6495

    
6496
    # on one-node clusters nodelist will be empty after the removal
6497
    # if we proceed the backup would be removed because OpQueryExports
6498
    # substitutes an empty list with the full cluster node list.
6499
    iname = instance.name
6500
    if nodelist:
6501
      exportlist = self.rpc.call_export_list(nodelist)
6502
      for node in exportlist:
6503
        if exportlist[node].fail_msg:
6504
          continue
6505
        if iname in exportlist[node].payload:
6506
          msg = self.rpc.call_export_remove(node, iname).fail_msg
6507
          if msg:
6508
            self.LogWarning("Could not remove older export for instance %s"
6509
                            " on node %s: %s", iname, node, msg)
6510

    
6511

    
6512
class LURemoveExport(NoHooksLU):
6513
  """Remove exports related to the named instance.
6514

6515
  """
6516
  _OP_REQP = ["instance_name"]
6517
  REQ_BGL = False
6518

    
6519
  def ExpandNames(self):
6520
    self.needed_locks = {}
6521
    # We need all nodes to be locked in order for RemoveExport to work, but we
6522
    # don't need to lock the instance itself, as nothing will happen to it (and
6523
    # we can remove exports also for a removed instance)
6524
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6525

    
6526
  def CheckPrereq(self):
6527
    """Check prerequisites.
6528
    """
6529
    pass
6530

    
6531
  def Exec(self, feedback_fn):
6532
    """Remove any export.
6533

6534
    """
6535
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
6536
    # If the instance was not found we'll try with the name that was passed in.
6537
    # This will only work if it was an FQDN, though.
6538
    fqdn_warn = False
6539
    if not instance_name:
6540
      fqdn_warn = True
6541
      instance_name = self.op.instance_name
6542

    
6543
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6544
    exportlist = self.rpc.call_export_list(locked_nodes)
6545
    found = False
6546
    for node in exportlist:
6547
      msg = exportlist[node].fail_msg
6548
      if msg:
6549
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
6550
        continue
6551
      if instance_name in exportlist[node].payload:
6552
        found = True
6553
        result = self.rpc.call_export_remove(node, instance_name)
6554
        msg = result.fail_msg
6555
        if msg:
6556
          logging.error("Could not remove export for instance %s"
6557
                        " on node %s: %s", instance_name, node, msg)
6558

    
6559
    if fqdn_warn and not found:
6560
      feedback_fn("Export not found. If trying to remove an export belonging"
6561
                  " to a deleted instance please use its Fully Qualified"
6562
                  " Domain Name.")
6563

    
6564

    
6565
class TagsLU(NoHooksLU):
6566
  """Generic tags LU.
6567

6568
  This is an abstract class which is the parent of all the other tags LUs.
6569

6570
  """
6571

    
6572
  def ExpandNames(self):
6573
    self.needed_locks = {}
6574
    if self.op.kind == constants.TAG_NODE:
6575
      name = self.cfg.ExpandNodeName(self.op.name)
6576
      if name is None:
6577
        raise errors.OpPrereqError("Invalid node name (%s)" %
6578
                                   (self.op.name,))
6579
      self.op.name = name
6580
      self.needed_locks[locking.LEVEL_NODE] = name
6581
    elif self.op.kind == constants.TAG_INSTANCE:
6582
      name = self.cfg.ExpandInstanceName(self.op.name)
6583
      if name is None:
6584
        raise errors.OpPrereqError("Invalid instance name (%s)" %
6585
                                   (self.op.name,))
6586
      self.op.name = name
6587
      self.needed_locks[locking.LEVEL_INSTANCE] = name
6588

    
6589
  def CheckPrereq(self):
6590
    """Check prerequisites.
6591

6592
    """
6593
    if self.op.kind == constants.TAG_CLUSTER:
6594
      self.target = self.cfg.GetClusterInfo()
6595
    elif self.op.kind == constants.TAG_NODE:
6596
      self.target = self.cfg.GetNodeInfo(self.op.name)
6597
    elif self.op.kind == constants.TAG_INSTANCE:
6598
      self.target = self.cfg.GetInstanceInfo(self.op.name)
6599
    else:
6600
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
6601
                                 str(self.op.kind))
6602

    
6603

    
6604
class LUGetTags(TagsLU):
6605
  """Returns the tags of a given object.
6606

6607
  """
6608
  _OP_REQP = ["kind", "name"]
6609
  REQ_BGL = False
6610

    
6611
  def Exec(self, feedback_fn):
6612
    """Returns the tag list.
6613

6614
    """
6615
    return list(self.target.GetTags())
6616

    
6617

    
6618
class LUSearchTags(NoHooksLU):
6619
  """Searches the tags for a given pattern.
6620

6621
  """
6622
  _OP_REQP = ["pattern"]
6623
  REQ_BGL = False
6624

    
6625
  def ExpandNames(self):
6626
    self.needed_locks = {}
6627

    
6628
  def CheckPrereq(self):
6629
    """Check prerequisites.
6630

6631
    This checks the pattern passed for validity by compiling it.
6632

6633
    """
6634
    try:
6635
      self.re = re.compile(self.op.pattern)
6636
    except re.error, err:
6637
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
6638
                                 (self.op.pattern, err))
6639

    
6640
  def Exec(self, feedback_fn):
6641
    """Returns the tag list.
6642

6643
    """
6644
    cfg = self.cfg
6645
    tgts = [("/cluster", cfg.GetClusterInfo())]
6646
    ilist = cfg.GetAllInstancesInfo().values()
6647
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
6648
    nlist = cfg.GetAllNodesInfo().values()
6649
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
6650
    results = []
6651
    for path, target in tgts:
6652
      for tag in target.GetTags():
6653
        if self.re.search(tag):
6654
          results.append((path, tag))
6655
    return results
6656

    
6657

    
6658
class LUAddTags(TagsLU):
6659
  """Sets a tag on a given object.
6660

6661
  """
6662
  _OP_REQP = ["kind", "name", "tags"]
6663
  REQ_BGL = False
6664

    
6665
  def CheckPrereq(self):
6666
    """Check prerequisites.
6667

6668
    This checks the type and length of the tag name and value.
6669

6670
    """
6671
    TagsLU.CheckPrereq(self)
6672
    for tag in self.op.tags:
6673
      objects.TaggableObject.ValidateTag(tag)
6674

    
6675
  def Exec(self, feedback_fn):
6676
    """Sets the tag.
6677

6678
    """
6679
    try:
6680
      for tag in self.op.tags:
6681
        self.target.AddTag(tag)
6682
    except errors.TagError, err:
6683
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
6684
    try:
6685
      self.cfg.Update(self.target)
6686
    except errors.ConfigurationError:
6687
      raise errors.OpRetryError("There has been a modification to the"
6688
                                " config file and the operation has been"
6689
                                " aborted. Please retry.")
6690

    
6691

    
6692
class LUDelTags(TagsLU):
6693
  """Delete a list of tags from a given object.
6694

6695
  """
6696
  _OP_REQP = ["kind", "name", "tags"]
6697
  REQ_BGL = False
6698

    
6699
  def CheckPrereq(self):
6700
    """Check prerequisites.
6701

6702
    This checks that we have the given tag.
6703

6704
    """
6705
    TagsLU.CheckPrereq(self)
6706
    for tag in self.op.tags:
6707
      objects.TaggableObject.ValidateTag(tag)
6708
    del_tags = frozenset(self.op.tags)
6709
    cur_tags = self.target.GetTags()
6710
    if not del_tags <= cur_tags:
6711
      diff_tags = del_tags - cur_tags
6712
      diff_names = ["'%s'" % tag for tag in diff_tags]
6713
      diff_names.sort()
6714
      raise errors.OpPrereqError("Tag(s) %s not found" %
6715
                                 (",".join(diff_names)))
6716

    
6717
  def Exec(self, feedback_fn):
6718
    """Remove the tag from the object.
6719

6720
    """
6721
    for tag in self.op.tags:
6722
      self.target.RemoveTag(tag)
6723
    try:
6724
      self.cfg.Update(self.target)
6725
    except errors.ConfigurationError:
6726
      raise errors.OpRetryError("There has been a modification to the"
6727
                                " config file and the operation has been"
6728
                                " aborted. Please retry.")
6729

    
6730

    
6731
class LUTestDelay(NoHooksLU):
6732
  """Sleep for a specified amount of time.
6733

6734
  This LU sleeps on the master and/or nodes for a specified amount of
6735
  time.
6736

6737
  """
6738
  _OP_REQP = ["duration", "on_master", "on_nodes"]
6739
  REQ_BGL = False
6740

    
6741
  def ExpandNames(self):
6742
    """Expand names and set required locks.
6743

6744
    This expands the node list, if any.
6745

6746
    """
6747
    self.needed_locks = {}
6748
    if self.op.on_nodes:
6749
      # _GetWantedNodes can be used here, but is not always appropriate to use
6750
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
6751
      # more information.
6752
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
6753
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
6754

    
6755
  def CheckPrereq(self):
6756
    """Check prerequisites.
6757

6758
    """
6759

    
6760
  def Exec(self, feedback_fn):
6761
    """Do the actual sleep.
6762

6763
    """
6764
    if self.op.on_master:
6765
      if not utils.TestDelay(self.op.duration):
6766
        raise errors.OpExecError("Error during master delay test")
6767
    if self.op.on_nodes:
6768
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
6769
      for node, node_result in result.items():
6770
        node_result.Raise("Failure during rpc call to node %s" % node)
6771

    
6772

    
6773
class IAllocator(object):
6774
  """IAllocator framework.
6775

6776
  An IAllocator instance has three sets of attributes:
6777
    - cfg that is needed to query the cluster
6778
    - input data (all members of the _KEYS class attribute are required)
6779
    - four buffer attributes (in|out_data|text), that represent the
6780
      input (to the external script) in text and data structure format,
6781
      and the output from it, again in two formats
6782
    - the result variables from the script (success, info, nodes) for
6783
      easy usage
6784

6785
  """
6786
  _ALLO_KEYS = [
6787
    "mem_size", "disks", "disk_template",
6788
    "os", "tags", "nics", "vcpus", "hypervisor",
6789
    ]
6790
  _RELO_KEYS = [
6791
    "relocate_from",
6792
    ]
6793

    
6794
  def __init__(self, lu, mode, name, **kwargs):
6795
    self.lu = lu
6796
    # init buffer variables
6797
    self.in_text = self.out_text = self.in_data = self.out_data = None
6798
    # init all input fields so that pylint is happy
6799
    self.mode = mode
6800
    self.name = name
6801
    self.mem_size = self.disks = self.disk_template = None
6802
    self.os = self.tags = self.nics = self.vcpus = None
6803
    self.hypervisor = None
6804
    self.relocate_from = None
6805
    # computed fields
6806
    self.required_nodes = None
6807
    # init result fields
6808
    self.success = self.info = self.nodes = None
6809
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
6810
      keyset = self._ALLO_KEYS
6811
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
6812
      keyset = self._RELO_KEYS
6813
    else:
6814
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
6815
                                   " IAllocator" % self.mode)
6816
    for key in kwargs:
6817
      if key not in keyset:
6818
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
6819
                                     " IAllocator" % key)
6820
      setattr(self, key, kwargs[key])
6821
    for key in keyset:
6822
      if key not in kwargs:
6823
        raise errors.ProgrammerError("Missing input parameter '%s' to"
6824
                                     " IAllocator" % key)
6825
    self._BuildInputData()
6826

    
6827
  def _ComputeClusterData(self):
6828
    """Compute the generic allocator input data.
6829

6830
    This is the data that is independent of the actual operation.
6831

6832
    """
6833
    cfg = self.lu.cfg
6834
    cluster_info = cfg.GetClusterInfo()
6835
    # cluster data
6836
    data = {
6837
      "version": constants.IALLOCATOR_VERSION,
6838
      "cluster_name": cfg.GetClusterName(),
6839
      "cluster_tags": list(cluster_info.GetTags()),
6840
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
6841
      # we don't have job IDs
6842
      }
6843
    iinfo = cfg.GetAllInstancesInfo().values()
6844
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
6845

    
6846
    # node data
6847
    node_results = {}
6848
    node_list = cfg.GetNodeList()
6849

    
6850
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
6851
      hypervisor_name = self.hypervisor
6852
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
6853
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
6854

    
6855
    node_data = self.lu.rpc.call_node_info(node_list, cfg.GetVGName(),
6856
                                           hypervisor_name)
6857
    node_iinfo = self.lu.rpc.call_all_instances_info(node_list,
6858
                       cluster_info.enabled_hypervisors)
6859
    for nname, nresult in node_data.items():
6860
      # first fill in static (config-based) values
6861
      ninfo = cfg.GetNodeInfo(nname)
6862
      pnr = {
6863
        "tags": list(ninfo.GetTags()),
6864
        "primary_ip": ninfo.primary_ip,
6865
        "secondary_ip": ninfo.secondary_ip,
6866
        "offline": ninfo.offline,
6867
        "drained": ninfo.drained,
6868
        "master_candidate": ninfo.master_candidate,
6869
        }
6870

    
6871
      if not ninfo.offline:
6872
        nresult.Raise("Can't get data for node %s" % nname)
6873
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
6874
                                nname)
6875
        remote_info = nresult.payload
6876
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
6877
                     'vg_size', 'vg_free', 'cpu_total']:
6878
          if attr not in remote_info:
6879
            raise errors.OpExecError("Node '%s' didn't return attribute"
6880
                                     " '%s'" % (nname, attr))
6881
          if not isinstance(remote_info[attr], int):
6882
            raise errors.OpExecError("Node '%s' returned invalid value"
6883
                                     " for '%s': %s" %
6884
                                     (nname, attr, remote_info[attr]))
6885
        # compute memory used by primary instances
6886
        i_p_mem = i_p_up_mem = 0
6887
        for iinfo, beinfo in i_list:
6888
          if iinfo.primary_node == nname:
6889
            i_p_mem += beinfo[constants.BE_MEMORY]
6890
            if iinfo.name not in node_iinfo[nname].payload:
6891
              i_used_mem = 0
6892
            else:
6893
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
6894
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
6895
            remote_info['memory_free'] -= max(0, i_mem_diff)
6896

    
6897
            if iinfo.admin_up:
6898
              i_p_up_mem += beinfo[constants.BE_MEMORY]
6899

    
6900
        # compute memory used by instances
6901
        pnr_dyn = {
6902
          "total_memory": remote_info['memory_total'],
6903
          "reserved_memory": remote_info['memory_dom0'],
6904
          "free_memory": remote_info['memory_free'],
6905
          "total_disk": remote_info['vg_size'],
6906
          "free_disk": remote_info['vg_free'],
6907
          "total_cpus": remote_info['cpu_total'],
6908
          "i_pri_memory": i_p_mem,
6909
          "i_pri_up_memory": i_p_up_mem,
6910
          }
6911
        pnr.update(pnr_dyn)
6912

    
6913
      node_results[nname] = pnr
6914
    data["nodes"] = node_results
6915

    
6916
    # instance data
6917
    instance_data = {}
6918
    for iinfo, beinfo in i_list:
6919
      nic_data = []
6920
      for nic in iinfo.nics:
6921
        filled_params = objects.FillDict(
6922
            cluster_info.nicparams[constants.PP_DEFAULT],
6923
            nic.nicparams)
6924
        nic_dict = {"mac": nic.mac,
6925
                    "ip": nic.ip,
6926
                    "mode": filled_params[constants.NIC_MODE],
6927
                    "link": filled_params[constants.NIC_LINK],
6928
                   }
6929
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
6930
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
6931
        nic_data.append(nic_dict)
6932
      pir = {
6933
        "tags": list(iinfo.GetTags()),
6934
        "admin_up": iinfo.admin_up,
6935
        "vcpus": beinfo[constants.BE_VCPUS],
6936
        "memory": beinfo[constants.BE_MEMORY],
6937
        "os": iinfo.os,
6938
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
6939
        "nics": nic_data,
6940
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
6941
        "disk_template": iinfo.disk_template,
6942
        "hypervisor": iinfo.hypervisor,
6943
        }
6944
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
6945
                                                 pir["disks"])
6946
      instance_data[iinfo.name] = pir
6947

    
6948
    data["instances"] = instance_data
6949

    
6950
    self.in_data = data
6951

    
6952
  def _AddNewInstance(self):
6953
    """Add new instance data to allocator structure.
6954

6955
    This in combination with _AllocatorGetClusterData will create the
6956
    correct structure needed as input for the allocator.
6957

6958
    The checks for the completeness of the opcode must have already been
6959
    done.
6960

6961
    """
6962
    data = self.in_data
6963

    
6964
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
6965

    
6966
    if self.disk_template in constants.DTS_NET_MIRROR:
6967
      self.required_nodes = 2
6968
    else:
6969
      self.required_nodes = 1
6970
    request = {
6971
      "type": "allocate",
6972
      "name": self.name,
6973
      "disk_template": self.disk_template,
6974
      "tags": self.tags,
6975
      "os": self.os,
6976
      "vcpus": self.vcpus,
6977
      "memory": self.mem_size,
6978
      "disks": self.disks,
6979
      "disk_space_total": disk_space,
6980
      "nics": self.nics,
6981
      "required_nodes": self.required_nodes,
6982
      }
6983
    data["request"] = request
6984

    
6985
  def _AddRelocateInstance(self):
6986
    """Add relocate instance data to allocator structure.
6987

6988
    This in combination with _IAllocatorGetClusterData will create the
6989
    correct structure needed as input for the allocator.
6990

6991
    The checks for the completeness of the opcode must have already been
6992
    done.
6993

6994
    """
6995
    instance = self.lu.cfg.GetInstanceInfo(self.name)
6996
    if instance is None:
6997
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
6998
                                   " IAllocator" % self.name)
6999

    
7000
    if instance.disk_template not in constants.DTS_NET_MIRROR:
7001
      raise errors.OpPrereqError("Can't relocate non-mirrored instances")
7002

    
7003
    if len(instance.secondary_nodes) != 1:
7004
      raise errors.OpPrereqError("Instance has not exactly one secondary node")
7005

    
7006
    self.required_nodes = 1
7007
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
7008
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
7009

    
7010
    request = {
7011
      "type": "relocate",
7012
      "name": self.name,
7013
      "disk_space_total": disk_space,
7014
      "required_nodes": self.required_nodes,
7015
      "relocate_from": self.relocate_from,
7016
      }
7017
    self.in_data["request"] = request
7018

    
7019
  def _BuildInputData(self):
7020
    """Build input data structures.
7021

7022
    """
7023
    self._ComputeClusterData()
7024

    
7025
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7026
      self._AddNewInstance()
7027
    else:
7028
      self._AddRelocateInstance()
7029

    
7030
    self.in_text = serializer.Dump(self.in_data)
7031

    
7032
  def Run(self, name, validate=True, call_fn=None):
7033
    """Run an instance allocator and return the results.
7034

7035
    """
7036
    if call_fn is None:
7037
      call_fn = self.lu.rpc.call_iallocator_runner
7038
    data = self.in_text
7039

    
7040
    result = call_fn(self.lu.cfg.GetMasterNode(), name, self.in_text)
7041
    result.Raise("Failure while running the iallocator script")
7042

    
7043
    self.out_text = result.payload
7044
    if validate:
7045
      self._ValidateResult()
7046

    
7047
  def _ValidateResult(self):
7048
    """Process the allocator results.
7049

7050
    This will process and if successful save the result in
7051
    self.out_data and the other parameters.
7052

7053
    """
7054
    try:
7055
      rdict = serializer.Load(self.out_text)
7056
    except Exception, err:
7057
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
7058

    
7059
    if not isinstance(rdict, dict):
7060
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
7061

    
7062
    for key in "success", "info", "nodes":
7063
      if key not in rdict:
7064
        raise errors.OpExecError("Can't parse iallocator results:"
7065
                                 " missing key '%s'" % key)
7066
      setattr(self, key, rdict[key])
7067

    
7068
    if not isinstance(rdict["nodes"], list):
7069
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
7070
                               " is not a list")
7071
    self.out_data = rdict
7072

    
7073

    
7074
class LUTestAllocator(NoHooksLU):
7075
  """Run allocator tests.
7076

7077
  This LU runs the allocator tests
7078

7079
  """
7080
  _OP_REQP = ["direction", "mode", "name"]
7081

    
7082
  def CheckPrereq(self):
7083
    """Check prerequisites.
7084

7085
    This checks the opcode parameters depending on the director and mode test.
7086

7087
    """
7088
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
7089
      for attr in ["name", "mem_size", "disks", "disk_template",
7090
                   "os", "tags", "nics", "vcpus"]:
7091
        if not hasattr(self.op, attr):
7092
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
7093
                                     attr)
7094
      iname = self.cfg.ExpandInstanceName(self.op.name)
7095
      if iname is not None:
7096
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
7097
                                   iname)
7098
      if not isinstance(self.op.nics, list):
7099
        raise errors.OpPrereqError("Invalid parameter 'nics'")
7100
      for row in self.op.nics:
7101
        if (not isinstance(row, dict) or
7102
            "mac" not in row or
7103
            "ip" not in row or
7104
            "bridge" not in row):
7105
          raise errors.OpPrereqError("Invalid contents of the"
7106
                                     " 'nics' parameter")
7107
      if not isinstance(self.op.disks, list):
7108
        raise errors.OpPrereqError("Invalid parameter 'disks'")
7109
      for row in self.op.disks:
7110
        if (not isinstance(row, dict) or
7111
            "size" not in row or
7112
            not isinstance(row["size"], int) or
7113
            "mode" not in row or
7114
            row["mode"] not in ['r', 'w']):
7115
          raise errors.OpPrereqError("Invalid contents of the"
7116
                                     " 'disks' parameter")
7117
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
7118
        self.op.hypervisor = self.cfg.GetHypervisorType()
7119
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
7120
      if not hasattr(self.op, "name"):
7121
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
7122
      fname = self.cfg.ExpandInstanceName(self.op.name)
7123
      if fname is None:
7124
        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
7125
                                   self.op.name)
7126
      self.op.name = fname
7127
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
7128
    else:
7129
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
7130
                                 self.op.mode)
7131

    
7132
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
7133
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
7134
        raise errors.OpPrereqError("Missing allocator name")
7135
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
7136
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
7137
                                 self.op.direction)
7138

    
7139
  def Exec(self, feedback_fn):
7140
    """Run the allocator test.
7141

7142
    """
7143
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
7144
      ial = IAllocator(self,
7145
                       mode=self.op.mode,
7146
                       name=self.op.name,
7147
                       mem_size=self.op.mem_size,
7148
                       disks=self.op.disks,
7149
                       disk_template=self.op.disk_template,
7150
                       os=self.op.os,
7151
                       tags=self.op.tags,
7152
                       nics=self.op.nics,
7153
                       vcpus=self.op.vcpus,
7154
                       hypervisor=self.op.hypervisor,
7155
                       )
7156
    else:
7157
      ial = IAllocator(self,
7158
                       mode=self.op.mode,
7159
                       name=self.op.name,
7160
                       relocate_from=list(self.relocate_from),
7161
                       )
7162

    
7163
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
7164
      result = ial.in_text
7165
    else:
7166
      ial.Run(self.op.allocator, validate=False)
7167
      result = ial.out_text
7168
    return result