Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 62f0dd02

History | View | Annotate | Download (250.3 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0613,W0201
25

    
26
import os
27
import os.path
28
import time
29
import tempfile
30
import re
31
import platform
32
import logging
33
import copy
34
import random
35

    
36
from ganeti import ssh
37
from ganeti import utils
38
from ganeti import errors
39
from ganeti import hypervisor
40
from ganeti import locking
41
from ganeti import constants
42
from ganeti import objects
43
from ganeti import opcodes
44
from ganeti import serializer
45
from ganeti import ssconf
46

    
47

    
48
class LogicalUnit(object):
49
  """Logical Unit base class.
50

51
  Subclasses must follow these rules:
52
    - implement ExpandNames
53
    - implement CheckPrereq
54
    - implement Exec
55
    - implement BuildHooksEnv
56
    - redefine HPATH and HTYPE
57
    - optionally redefine their run requirements:
58
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
59

60
  Note that all commands require root permissions.
61

62
  """
63
  HPATH = None
64
  HTYPE = None
65
  _OP_REQP = []
66
  REQ_BGL = True
67

    
68
  def __init__(self, processor, op, context, rpc):
69
    """Constructor for LogicalUnit.
70

71
    This needs to be overriden in derived classes in order to check op
72
    validity.
73

74
    """
75
    self.proc = processor
76
    self.op = op
77
    self.cfg = context.cfg
78
    self.context = context
79
    self.rpc = rpc
80
    # Dicts used to declare locking needs to mcpu
81
    self.needed_locks = None
82
    self.acquired_locks = {}
83
    self.share_locks = dict(((i, 0) for i in locking.LEVELS))
84
    self.add_locks = {}
85
    self.remove_locks = {}
86
    # Used to force good behavior when calling helper functions
87
    self.recalculate_locks = {}
88
    self.__ssh = None
89
    # logging
90
    self.LogWarning = processor.LogWarning
91
    self.LogInfo = processor.LogInfo
92

    
93
    for attr_name in self._OP_REQP:
94
      attr_val = getattr(op, attr_name, None)
95
      if attr_val is None:
96
        raise errors.OpPrereqError("Required parameter '%s' missing" %
97
                                   attr_name)
98
    self.CheckArguments()
99

    
100
  def __GetSSH(self):
101
    """Returns the SshRunner object
102

103
    """
104
    if not self.__ssh:
105
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
106
    return self.__ssh
107

    
108
  ssh = property(fget=__GetSSH)
109

    
110
  def CheckArguments(self):
111
    """Check syntactic validity for the opcode arguments.
112

113
    This method is for doing a simple syntactic check and ensure
114
    validity of opcode parameters, without any cluster-related
115
    checks. While the same can be accomplished in ExpandNames and/or
116
    CheckPrereq, doing these separate is better because:
117

118
      - ExpandNames is left as as purely a lock-related function
119
      - CheckPrereq is run after we have aquired locks (and possible
120
        waited for them)
121

122
    The function is allowed to change the self.op attribute so that
123
    later methods can no longer worry about missing parameters.
124

125
    """
126
    pass
127

    
128
  def ExpandNames(self):
129
    """Expand names for this LU.
130

131
    This method is called before starting to execute the opcode, and it should
132
    update all the parameters of the opcode to their canonical form (e.g. a
133
    short node name must be fully expanded after this method has successfully
134
    completed). This way locking, hooks, logging, ecc. can work correctly.
135

136
    LUs which implement this method must also populate the self.needed_locks
137
    member, as a dict with lock levels as keys, and a list of needed lock names
138
    as values. Rules:
139

140
      - use an empty dict if you don't need any lock
141
      - if you don't need any lock at a particular level omit that level
142
      - don't put anything for the BGL level
143
      - if you want all locks at a level use locking.ALL_SET as a value
144

145
    If you need to share locks (rather than acquire them exclusively) at one
146
    level you can modify self.share_locks, setting a true value (usually 1) for
147
    that level. By default locks are not shared.
148

149
    Examples::
150

151
      # Acquire all nodes and one instance
152
      self.needed_locks = {
153
        locking.LEVEL_NODE: locking.ALL_SET,
154
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
155
      }
156
      # Acquire just two nodes
157
      self.needed_locks = {
158
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
159
      }
160
      # Acquire no locks
161
      self.needed_locks = {} # No, you can't leave it to the default value None
162

163
    """
164
    # The implementation of this method is mandatory only if the new LU is
165
    # concurrent, so that old LUs don't need to be changed all at the same
166
    # time.
167
    if self.REQ_BGL:
168
      self.needed_locks = {} # Exclusive LUs don't need locks.
169
    else:
170
      raise NotImplementedError
171

    
172
  def DeclareLocks(self, level):
173
    """Declare LU locking needs for a level
174

175
    While most LUs can just declare their locking needs at ExpandNames time,
176
    sometimes there's the need to calculate some locks after having acquired
177
    the ones before. This function is called just before acquiring locks at a
178
    particular level, but after acquiring the ones at lower levels, and permits
179
    such calculations. It can be used to modify self.needed_locks, and by
180
    default it does nothing.
181

182
    This function is only called if you have something already set in
183
    self.needed_locks for the level.
184

185
    @param level: Locking level which is going to be locked
186
    @type level: member of ganeti.locking.LEVELS
187

188
    """
189

    
190
  def CheckPrereq(self):
191
    """Check prerequisites for this LU.
192

193
    This method should check that the prerequisites for the execution
194
    of this LU are fulfilled. It can do internode communication, but
195
    it should be idempotent - no cluster or system changes are
196
    allowed.
197

198
    The method should raise errors.OpPrereqError in case something is
199
    not fulfilled. Its return value is ignored.
200

201
    This method should also update all the parameters of the opcode to
202
    their canonical form if it hasn't been done by ExpandNames before.
203

204
    """
205
    raise NotImplementedError
206

    
207
  def Exec(self, feedback_fn):
208
    """Execute the LU.
209

210
    This method should implement the actual work. It should raise
211
    errors.OpExecError for failures that are somewhat dealt with in
212
    code, or expected.
213

214
    """
215
    raise NotImplementedError
216

    
217
  def BuildHooksEnv(self):
218
    """Build hooks environment for this LU.
219

220
    This method should return a three-node tuple consisting of: a dict
221
    containing the environment that will be used for running the
222
    specific hook for this LU, a list of node names on which the hook
223
    should run before the execution, and a list of node names on which
224
    the hook should run after the execution.
225

226
    The keys of the dict must not have 'GANETI_' prefixed as this will
227
    be handled in the hooks runner. Also note additional keys will be
228
    added by the hooks runner. If the LU doesn't define any
229
    environment, an empty dict (and not None) should be returned.
230

231
    No nodes should be returned as an empty list (and not None).
232

233
    Note that if the HPATH for a LU class is None, this function will
234
    not be called.
235

236
    """
237
    raise NotImplementedError
238

    
239
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
240
    """Notify the LU about the results of its hooks.
241

242
    This method is called every time a hooks phase is executed, and notifies
243
    the Logical Unit about the hooks' result. The LU can then use it to alter
244
    its result based on the hooks.  By default the method does nothing and the
245
    previous result is passed back unchanged but any LU can define it if it
246
    wants to use the local cluster hook-scripts somehow.
247

248
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
249
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
250
    @param hook_results: the results of the multi-node hooks rpc call
251
    @param feedback_fn: function used send feedback back to the caller
252
    @param lu_result: the previous Exec result this LU had, or None
253
        in the PRE phase
254
    @return: the new Exec result, based on the previous result
255
        and hook results
256

257
    """
258
    return lu_result
259

    
260
  def _ExpandAndLockInstance(self):
261
    """Helper function to expand and lock an instance.
262

263
    Many LUs that work on an instance take its name in self.op.instance_name
264
    and need to expand it and then declare the expanded name for locking. This
265
    function does it, and then updates self.op.instance_name to the expanded
266
    name. It also initializes needed_locks as a dict, if this hasn't been done
267
    before.
268

269
    """
270
    if self.needed_locks is None:
271
      self.needed_locks = {}
272
    else:
273
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
274
        "_ExpandAndLockInstance called with instance-level locks set"
275
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
276
    if expanded_name is None:
277
      raise errors.OpPrereqError("Instance '%s' not known" %
278
                                  self.op.instance_name)
279
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
280
    self.op.instance_name = expanded_name
281

    
282
  def _LockInstancesNodes(self, primary_only=False):
283
    """Helper function to declare instances' nodes for locking.
284

285
    This function should be called after locking one or more instances to lock
286
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
287
    with all primary or secondary nodes for instances already locked and
288
    present in self.needed_locks[locking.LEVEL_INSTANCE].
289

290
    It should be called from DeclareLocks, and for safety only works if
291
    self.recalculate_locks[locking.LEVEL_NODE] is set.
292

293
    In the future it may grow parameters to just lock some instance's nodes, or
294
    to just lock primaries or secondary nodes, if needed.
295

296
    If should be called in DeclareLocks in a way similar to::
297

298
      if level == locking.LEVEL_NODE:
299
        self._LockInstancesNodes()
300

301
    @type primary_only: boolean
302
    @param primary_only: only lock primary nodes of locked instances
303

304
    """
305
    assert locking.LEVEL_NODE in self.recalculate_locks, \
306
      "_LockInstancesNodes helper function called with no nodes to recalculate"
307

    
308
    # TODO: check if we're really been called with the instance locks held
309

    
310
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
311
    # future we might want to have different behaviors depending on the value
312
    # of self.recalculate_locks[locking.LEVEL_NODE]
313
    wanted_nodes = []
314
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
315
      instance = self.context.cfg.GetInstanceInfo(instance_name)
316
      wanted_nodes.append(instance.primary_node)
317
      if not primary_only:
318
        wanted_nodes.extend(instance.secondary_nodes)
319

    
320
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
321
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
322
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
323
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
324

    
325
    del self.recalculate_locks[locking.LEVEL_NODE]
326

    
327

    
328
class NoHooksLU(LogicalUnit):
329
  """Simple LU which runs no hooks.
330

331
  This LU is intended as a parent for other LogicalUnits which will
332
  run no hooks, in order to reduce duplicate code.
333

334
  """
335
  HPATH = None
336
  HTYPE = None
337

    
338

    
339
def _GetWantedNodes(lu, nodes):
340
  """Returns list of checked and expanded node names.
341

342
  @type lu: L{LogicalUnit}
343
  @param lu: the logical unit on whose behalf we execute
344
  @type nodes: list
345
  @param nodes: list of node names or None for all nodes
346
  @rtype: list
347
  @return: the list of nodes, sorted
348
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
349

350
  """
351
  if not isinstance(nodes, list):
352
    raise errors.OpPrereqError("Invalid argument type 'nodes'")
353

    
354
  if not nodes:
355
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
356
      " non-empty list of nodes whose name is to be expanded.")
357

    
358
  wanted = []
359
  for name in nodes:
360
    node = lu.cfg.ExpandNodeName(name)
361
    if node is None:
362
      raise errors.OpPrereqError("No such node name '%s'" % name)
363
    wanted.append(node)
364

    
365
  return utils.NiceSort(wanted)
366

    
367

    
368
def _GetWantedInstances(lu, instances):
369
  """Returns list of checked and expanded instance names.
370

371
  @type lu: L{LogicalUnit}
372
  @param lu: the logical unit on whose behalf we execute
373
  @type instances: list
374
  @param instances: list of instance names or None for all instances
375
  @rtype: list
376
  @return: the list of instances, sorted
377
  @raise errors.OpPrereqError: if the instances parameter is wrong type
378
  @raise errors.OpPrereqError: if any of the passed instances is not found
379

380
  """
381
  if not isinstance(instances, list):
382
    raise errors.OpPrereqError("Invalid argument type 'instances'")
383

    
384
  if instances:
385
    wanted = []
386

    
387
    for name in instances:
388
      instance = lu.cfg.ExpandInstanceName(name)
389
      if instance is None:
390
        raise errors.OpPrereqError("No such instance name '%s'" % name)
391
      wanted.append(instance)
392

    
393
  else:
394
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
395
  return wanted
396

    
397

    
398
def _CheckOutputFields(static, dynamic, selected):
399
  """Checks whether all selected fields are valid.
400

401
  @type static: L{utils.FieldSet}
402
  @param static: static fields set
403
  @type dynamic: L{utils.FieldSet}
404
  @param dynamic: dynamic fields set
405

406
  """
407
  f = utils.FieldSet()
408
  f.Extend(static)
409
  f.Extend(dynamic)
410

    
411
  delta = f.NonMatching(selected)
412
  if delta:
413
    raise errors.OpPrereqError("Unknown output fields selected: %s"
414
                               % ",".join(delta))
415

    
416

    
417
def _CheckBooleanOpField(op, name):
418
  """Validates boolean opcode parameters.
419

420
  This will ensure that an opcode parameter is either a boolean value,
421
  or None (but that it always exists).
422

423
  """
424
  val = getattr(op, name, None)
425
  if not (val is None or isinstance(val, bool)):
426
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
427
                               (name, str(val)))
428
  setattr(op, name, val)
429

    
430

    
431
def _CheckNodeOnline(lu, node):
432
  """Ensure that a given node is online.
433

434
  @param lu: the LU on behalf of which we make the check
435
  @param node: the node to check
436
  @raise errors.OpPrereqError: if the node is offline
437

438
  """
439
  if lu.cfg.GetNodeInfo(node).offline:
440
    raise errors.OpPrereqError("Can't use offline node %s" % node)
441

    
442

    
443
def _CheckNodeNotDrained(lu, node):
444
  """Ensure that a given node is not drained.
445

446
  @param lu: the LU on behalf of which we make the check
447
  @param node: the node to check
448
  @raise errors.OpPrereqError: if the node is drained
449

450
  """
451
  if lu.cfg.GetNodeInfo(node).drained:
452
    raise errors.OpPrereqError("Can't use drained node %s" % node)
453

    
454

    
455
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
456
                          memory, vcpus, nics, disk_template, disks):
457
  """Builds instance related env variables for hooks
458

459
  This builds the hook environment from individual variables.
460

461
  @type name: string
462
  @param name: the name of the instance
463
  @type primary_node: string
464
  @param primary_node: the name of the instance's primary node
465
  @type secondary_nodes: list
466
  @param secondary_nodes: list of secondary nodes as strings
467
  @type os_type: string
468
  @param os_type: the name of the instance's OS
469
  @type status: boolean
470
  @param status: the should_run status of the instance
471
  @type memory: string
472
  @param memory: the memory size of the instance
473
  @type vcpus: string
474
  @param vcpus: the count of VCPUs the instance has
475
  @type nics: list
476
  @param nics: list of tuples (ip, bridge, mac) representing
477
      the NICs the instance  has
478
  @type disk_template: string
479
  @param disk_template: the distk template of the instance
480
  @type disks: list
481
  @param disks: the list of (size, mode) pairs
482
  @rtype: dict
483
  @return: the hook environment for this instance
484

485
  """
486
  if status:
487
    str_status = "up"
488
  else:
489
    str_status = "down"
490
  env = {
491
    "OP_TARGET": name,
492
    "INSTANCE_NAME": name,
493
    "INSTANCE_PRIMARY": primary_node,
494
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
495
    "INSTANCE_OS_TYPE": os_type,
496
    "INSTANCE_STATUS": str_status,
497
    "INSTANCE_MEMORY": memory,
498
    "INSTANCE_VCPUS": vcpus,
499
    "INSTANCE_DISK_TEMPLATE": disk_template,
500
  }
501

    
502
  if nics:
503
    nic_count = len(nics)
504
    for idx, (ip, mac, mode, link) in enumerate(nics):
505
      if ip is None:
506
        ip = ""
507
      env["INSTANCE_NIC%d_IP" % idx] = ip
508
      env["INSTANCE_NIC%d_MAC" % idx] = mac
509
      env["INSTANCE_NIC%d_MODE" % idx] = mode
510
      env["INSTANCE_NIC%d_LINK" % idx] = link
511
      if mode == constants.NIC_MODE_BRIDGED:
512
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
513
  else:
514
    nic_count = 0
515

    
516
  env["INSTANCE_NIC_COUNT"] = nic_count
517

    
518
  if disks:
519
    disk_count = len(disks)
520
    for idx, (size, mode) in enumerate(disks):
521
      env["INSTANCE_DISK%d_SIZE" % idx] = size
522
      env["INSTANCE_DISK%d_MODE" % idx] = mode
523
  else:
524
    disk_count = 0
525

    
526
  env["INSTANCE_DISK_COUNT"] = disk_count
527

    
528
  return env
529

    
530
def _PreBuildNICHooksList(lu, nics):
531
  """Build a list of nic information tuples.
532

533
  This list is suitable to be passed to _BuildInstanceHookEnv.
534

535
  @type lu:  L{LogicalUnit}
536
  @param lu: the logical unit on whose behalf we execute
537
  @type nics: list of L{objects.NIC}
538
  @param nics: list of nics to convert to hooks tuples
539

540
  """
541
  hooks_nics = []
542
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
543
  for nic in nics:
544
    ip = nic.ip
545
    mac = nic.mac
546
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
547
    mode = filled_params[constants.NIC_MODE]
548
    link = filled_params[constants.NIC_LINK]
549
    hooks_nics.append((ip, mac, mode, link))
550
  return hooks_nics
551

    
552
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
553
  """Builds instance related env variables for hooks from an object.
554

555
  @type lu: L{LogicalUnit}
556
  @param lu: the logical unit on whose behalf we execute
557
  @type instance: L{objects.Instance}
558
  @param instance: the instance for which we should build the
559
      environment
560
  @type override: dict
561
  @param override: dictionary with key/values that will override
562
      our values
563
  @rtype: dict
564
  @return: the hook environment dictionary
565

566
  """
567
  bep = lu.cfg.GetClusterInfo().FillBE(instance)
568
  args = {
569
    'name': instance.name,
570
    'primary_node': instance.primary_node,
571
    'secondary_nodes': instance.secondary_nodes,
572
    'os_type': instance.os,
573
    'status': instance.admin_up,
574
    'memory': bep[constants.BE_MEMORY],
575
    'vcpus': bep[constants.BE_VCPUS],
576
    'nics': _PreBuildNICHooksList(lu, instance.nics),
577
    'disk_template': instance.disk_template,
578
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
579
  }
580
  if override:
581
    args.update(override)
582
  return _BuildInstanceHookEnv(**args)
583

    
584

    
585
def _AdjustCandidatePool(lu):
586
  """Adjust the candidate pool after node operations.
587

588
  """
589
  mod_list = lu.cfg.MaintainCandidatePool()
590
  if mod_list:
591
    lu.LogInfo("Promoted nodes to master candidate role: %s",
592
               ", ".join(node.name for node in mod_list))
593
    for name in mod_list:
594
      lu.context.ReaddNode(name)
595
  mc_now, mc_max = lu.cfg.GetMasterCandidateStats()
596
  if mc_now > mc_max:
597
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
598
               (mc_now, mc_max))
599

    
600

    
601
def _CheckNicsBridgesExist(lu, target_nics, target_node,
602
                               profile=constants.PP_DEFAULT):
603
  """Check that the brigdes needed by a list of nics exist.
604

605
  """
606
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
607
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
608
                for nic in target_nics]
609
  brlist = [params[constants.NIC_LINK] for params in paramslist
610
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
611
  if brlist:
612
    result = lu.rpc.call_bridges_exist(target_node, brlist)
613
    result.Raise()
614
    if not result.data:
615
      raise errors.OpPrereqError("One or more target bridges %s does not"
616
                                 " exist on destination node '%s'" %
617
                                 (brlist, target_node))
618

    
619

    
620
def _CheckInstanceBridgesExist(lu, instance, node=None):
621
  """Check that the brigdes needed by an instance exist.
622

623
  """
624
  if node is None:
625
    node=instance.primary_node
626
  _CheckNicsBridgesExist(lu, instance.nics, node)
627

    
628

    
629
class LUDestroyCluster(NoHooksLU):
630
  """Logical unit for destroying the cluster.
631

632
  """
633
  _OP_REQP = []
634

    
635
  def CheckPrereq(self):
636
    """Check prerequisites.
637

638
    This checks whether the cluster is empty.
639

640
    Any errors are signalled by raising errors.OpPrereqError.
641

642
    """
643
    master = self.cfg.GetMasterNode()
644

    
645
    nodelist = self.cfg.GetNodeList()
646
    if len(nodelist) != 1 or nodelist[0] != master:
647
      raise errors.OpPrereqError("There are still %d node(s) in"
648
                                 " this cluster." % (len(nodelist) - 1))
649
    instancelist = self.cfg.GetInstanceList()
650
    if instancelist:
651
      raise errors.OpPrereqError("There are still %d instance(s) in"
652
                                 " this cluster." % len(instancelist))
653

    
654
  def Exec(self, feedback_fn):
655
    """Destroys the cluster.
656

657
    """
658
    master = self.cfg.GetMasterNode()
659
    result = self.rpc.call_node_stop_master(master, False)
660
    result.Raise()
661
    if not result.data:
662
      raise errors.OpExecError("Could not disable the master role")
663
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
664
    utils.CreateBackup(priv_key)
665
    utils.CreateBackup(pub_key)
666
    return master
667

    
668

    
669
class LUVerifyCluster(LogicalUnit):
670
  """Verifies the cluster status.
671

672
  """
673
  HPATH = "cluster-verify"
674
  HTYPE = constants.HTYPE_CLUSTER
675
  _OP_REQP = ["skip_checks"]
676
  REQ_BGL = False
677

    
678
  def ExpandNames(self):
679
    self.needed_locks = {
680
      locking.LEVEL_NODE: locking.ALL_SET,
681
      locking.LEVEL_INSTANCE: locking.ALL_SET,
682
    }
683
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
684

    
685
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
686
                  node_result, feedback_fn, master_files,
687
                  drbd_map, vg_name):
688
    """Run multiple tests against a node.
689

690
    Test list:
691

692
      - compares ganeti version
693
      - checks vg existance and size > 20G
694
      - checks config file checksum
695
      - checks ssh to other nodes
696

697
    @type nodeinfo: L{objects.Node}
698
    @param nodeinfo: the node to check
699
    @param file_list: required list of files
700
    @param local_cksum: dictionary of local files and their checksums
701
    @param node_result: the results from the node
702
    @param feedback_fn: function used to accumulate results
703
    @param master_files: list of files that only masters should have
704
    @param drbd_map: the useddrbd minors for this node, in
705
        form of minor: (instance, must_exist) which correspond to instances
706
        and their running status
707
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
708

709
    """
710
    node = nodeinfo.name
711

    
712
    # main result, node_result should be a non-empty dict
713
    if not node_result or not isinstance(node_result, dict):
714
      feedback_fn("  - ERROR: unable to verify node %s." % (node,))
715
      return True
716

    
717
    # compares ganeti version
718
    local_version = constants.PROTOCOL_VERSION
719
    remote_version = node_result.get('version', None)
720
    if not (remote_version and isinstance(remote_version, (list, tuple)) and
721
            len(remote_version) == 2):
722
      feedback_fn("  - ERROR: connection to %s failed" % (node))
723
      return True
724

    
725
    if local_version != remote_version[0]:
726
      feedback_fn("  - ERROR: incompatible protocol versions: master %s,"
727
                  " node %s %s" % (local_version, node, remote_version[0]))
728
      return True
729

    
730
    # node seems compatible, we can actually try to look into its results
731

    
732
    bad = False
733

    
734
    # full package version
735
    if constants.RELEASE_VERSION != remote_version[1]:
736
      feedback_fn("  - WARNING: software version mismatch: master %s,"
737
                  " node %s %s" %
738
                  (constants.RELEASE_VERSION, node, remote_version[1]))
739

    
740
    # checks vg existence and size > 20G
741
    if vg_name is not None:
742
      vglist = node_result.get(constants.NV_VGLIST, None)
743
      if not vglist:
744
        feedback_fn("  - ERROR: unable to check volume groups on node %s." %
745
                        (node,))
746
        bad = True
747
      else:
748
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
749
                                              constants.MIN_VG_SIZE)
750
        if vgstatus:
751
          feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
752
          bad = True
753

    
754
    # checks config file checksum
755

    
756
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
757
    if not isinstance(remote_cksum, dict):
758
      bad = True
759
      feedback_fn("  - ERROR: node hasn't returned file checksum data")
760
    else:
761
      for file_name in file_list:
762
        node_is_mc = nodeinfo.master_candidate
763
        must_have_file = file_name not in master_files
764
        if file_name not in remote_cksum:
765
          if node_is_mc or must_have_file:
766
            bad = True
767
            feedback_fn("  - ERROR: file '%s' missing" % file_name)
768
        elif remote_cksum[file_name] != local_cksum[file_name]:
769
          if node_is_mc or must_have_file:
770
            bad = True
771
            feedback_fn("  - ERROR: file '%s' has wrong checksum" % file_name)
772
          else:
773
            # not candidate and this is not a must-have file
774
            bad = True
775
            feedback_fn("  - ERROR: non master-candidate has old/wrong file"
776
                        " '%s'" % file_name)
777
        else:
778
          # all good, except non-master/non-must have combination
779
          if not node_is_mc and not must_have_file:
780
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
781
                        " candidates" % file_name)
782

    
783
    # checks ssh to any
784

    
785
    if constants.NV_NODELIST not in node_result:
786
      bad = True
787
      feedback_fn("  - ERROR: node hasn't returned node ssh connectivity data")
788
    else:
789
      if node_result[constants.NV_NODELIST]:
790
        bad = True
791
        for node in node_result[constants.NV_NODELIST]:
792
          feedback_fn("  - ERROR: ssh communication with node '%s': %s" %
793
                          (node, node_result[constants.NV_NODELIST][node]))
794

    
795
    if constants.NV_NODENETTEST not in node_result:
796
      bad = True
797
      feedback_fn("  - ERROR: node hasn't returned node tcp connectivity data")
798
    else:
799
      if node_result[constants.NV_NODENETTEST]:
800
        bad = True
801
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
802
        for node in nlist:
803
          feedback_fn("  - ERROR: tcp communication with node '%s': %s" %
804
                          (node, node_result[constants.NV_NODENETTEST][node]))
805

    
806
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
807
    if isinstance(hyp_result, dict):
808
      for hv_name, hv_result in hyp_result.iteritems():
809
        if hv_result is not None:
810
          feedback_fn("  - ERROR: hypervisor %s verify failure: '%s'" %
811
                      (hv_name, hv_result))
812

    
813
    # check used drbd list
814
    if vg_name is not None:
815
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
816
      if not isinstance(used_minors, (tuple, list)):
817
        feedback_fn("  - ERROR: cannot parse drbd status file: %s" %
818
                    str(used_minors))
819
      else:
820
        for minor, (iname, must_exist) in drbd_map.items():
821
          if minor not in used_minors and must_exist:
822
            feedback_fn("  - ERROR: drbd minor %d of instance %s is"
823
                        " not active" % (minor, iname))
824
            bad = True
825
        for minor in used_minors:
826
          if minor not in drbd_map:
827
            feedback_fn("  - ERROR: unallocated drbd minor %d is in use" %
828
                        minor)
829
            bad = True
830

    
831
    return bad
832

    
833
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
834
                      node_instance, feedback_fn, n_offline):
835
    """Verify an instance.
836

837
    This function checks to see if the required block devices are
838
    available on the instance's node.
839

840
    """
841
    bad = False
842

    
843
    node_current = instanceconfig.primary_node
844

    
845
    node_vol_should = {}
846
    instanceconfig.MapLVsByNode(node_vol_should)
847

    
848
    for node in node_vol_should:
849
      if node in n_offline:
850
        # ignore missing volumes on offline nodes
851
        continue
852
      for volume in node_vol_should[node]:
853
        if node not in node_vol_is or volume not in node_vol_is[node]:
854
          feedback_fn("  - ERROR: volume %s missing on node %s" %
855
                          (volume, node))
856
          bad = True
857

    
858
    if instanceconfig.admin_up:
859
      if ((node_current not in node_instance or
860
          not instance in node_instance[node_current]) and
861
          node_current not in n_offline):
862
        feedback_fn("  - ERROR: instance %s not running on node %s" %
863
                        (instance, node_current))
864
        bad = True
865

    
866
    for node in node_instance:
867
      if (not node == node_current):
868
        if instance in node_instance[node]:
869
          feedback_fn("  - ERROR: instance %s should not run on node %s" %
870
                          (instance, node))
871
          bad = True
872

    
873
    return bad
874

    
875
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
876
    """Verify if there are any unknown volumes in the cluster.
877

878
    The .os, .swap and backup volumes are ignored. All other volumes are
879
    reported as unknown.
880

881
    """
882
    bad = False
883

    
884
    for node in node_vol_is:
885
      for volume in node_vol_is[node]:
886
        if node not in node_vol_should or volume not in node_vol_should[node]:
887
          feedback_fn("  - ERROR: volume %s on node %s should not exist" %
888
                      (volume, node))
889
          bad = True
890
    return bad
891

    
892
  def _VerifyOrphanInstances(self, instancelist, node_instance, feedback_fn):
893
    """Verify the list of running instances.
894

895
    This checks what instances are running but unknown to the cluster.
896

897
    """
898
    bad = False
899
    for node in node_instance:
900
      for runninginstance in node_instance[node]:
901
        if runninginstance not in instancelist:
902
          feedback_fn("  - ERROR: instance %s on node %s should not exist" %
903
                          (runninginstance, node))
904
          bad = True
905
    return bad
906

    
907
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg, feedback_fn):
908
    """Verify N+1 Memory Resilience.
909

910
    Check that if one single node dies we can still start all the instances it
911
    was primary for.
912

913
    """
914
    bad = False
915

    
916
    for node, nodeinfo in node_info.iteritems():
917
      # This code checks that every node which is now listed as secondary has
918
      # enough memory to host all instances it is supposed to should a single
919
      # other node in the cluster fail.
920
      # FIXME: not ready for failover to an arbitrary node
921
      # FIXME: does not support file-backed instances
922
      # WARNING: we currently take into account down instances as well as up
923
      # ones, considering that even if they're down someone might want to start
924
      # them even in the event of a node failure.
925
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
926
        needed_mem = 0
927
        for instance in instances:
928
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
929
          if bep[constants.BE_AUTO_BALANCE]:
930
            needed_mem += bep[constants.BE_MEMORY]
931
        if nodeinfo['mfree'] < needed_mem:
932
          feedback_fn("  - ERROR: not enough memory on node %s to accomodate"
933
                      " failovers should node %s fail" % (node, prinode))
934
          bad = True
935
    return bad
936

    
937
  def CheckPrereq(self):
938
    """Check prerequisites.
939

940
    Transform the list of checks we're going to skip into a set and check that
941
    all its members are valid.
942

943
    """
944
    self.skip_set = frozenset(self.op.skip_checks)
945
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
946
      raise errors.OpPrereqError("Invalid checks to be skipped specified")
947

    
948
  def BuildHooksEnv(self):
949
    """Build hooks env.
950

951
    Cluster-Verify hooks just rone in the post phase and their failure makes
952
    the output be logged in the verify output and the verification to fail.
953

954
    """
955
    all_nodes = self.cfg.GetNodeList()
956
    env = {
957
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
958
      }
959
    for node in self.cfg.GetAllNodesInfo().values():
960
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
961

    
962
    return env, [], all_nodes
963

    
964
  def Exec(self, feedback_fn):
965
    """Verify integrity of cluster, performing various test on nodes.
966

967
    """
968
    bad = False
969
    feedback_fn("* Verifying global settings")
970
    for msg in self.cfg.VerifyConfig():
971
      feedback_fn("  - ERROR: %s" % msg)
972

    
973
    vg_name = self.cfg.GetVGName()
974
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
975
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
976
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
977
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
978
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
979
                        for iname in instancelist)
980
    i_non_redundant = [] # Non redundant instances
981
    i_non_a_balanced = [] # Non auto-balanced instances
982
    n_offline = [] # List of offline nodes
983
    n_drained = [] # List of nodes being drained
984
    node_volume = {}
985
    node_instance = {}
986
    node_info = {}
987
    instance_cfg = {}
988

    
989
    # FIXME: verify OS list
990
    # do local checksums
991
    master_files = [constants.CLUSTER_CONF_FILE]
992

    
993
    file_names = ssconf.SimpleStore().GetFileList()
994
    file_names.append(constants.SSL_CERT_FILE)
995
    file_names.append(constants.RAPI_CERT_FILE)
996
    file_names.extend(master_files)
997

    
998
    local_checksums = utils.FingerprintFiles(file_names)
999

    
1000
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1001
    node_verify_param = {
1002
      constants.NV_FILELIST: file_names,
1003
      constants.NV_NODELIST: [node.name for node in nodeinfo
1004
                              if not node.offline],
1005
      constants.NV_HYPERVISOR: hypervisors,
1006
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1007
                                  node.secondary_ip) for node in nodeinfo
1008
                                 if not node.offline],
1009
      constants.NV_INSTANCELIST: hypervisors,
1010
      constants.NV_VERSION: None,
1011
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1012
      }
1013
    if vg_name is not None:
1014
      node_verify_param[constants.NV_VGLIST] = None
1015
      node_verify_param[constants.NV_LVLIST] = vg_name
1016
      node_verify_param[constants.NV_DRBDLIST] = None
1017
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1018
                                           self.cfg.GetClusterName())
1019

    
1020
    cluster = self.cfg.GetClusterInfo()
1021
    master_node = self.cfg.GetMasterNode()
1022
    all_drbd_map = self.cfg.ComputeDRBDMap()
1023

    
1024
    for node_i in nodeinfo:
1025
      node = node_i.name
1026
      nresult = all_nvinfo[node].data
1027

    
1028
      if node_i.offline:
1029
        feedback_fn("* Skipping offline node %s" % (node,))
1030
        n_offline.append(node)
1031
        continue
1032

    
1033
      if node == master_node:
1034
        ntype = "master"
1035
      elif node_i.master_candidate:
1036
        ntype = "master candidate"
1037
      elif node_i.drained:
1038
        ntype = "drained"
1039
        n_drained.append(node)
1040
      else:
1041
        ntype = "regular"
1042
      feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1043

    
1044
      if all_nvinfo[node].failed or not isinstance(nresult, dict):
1045
        feedback_fn("  - ERROR: connection to %s failed" % (node,))
1046
        bad = True
1047
        continue
1048

    
1049
      node_drbd = {}
1050
      for minor, instance in all_drbd_map[node].items():
1051
        if instance not in instanceinfo:
1052
          feedback_fn("  - ERROR: ghost instance '%s' in temporary DRBD map" %
1053
                      instance)
1054
          # ghost instance should not be running, but otherwise we
1055
          # don't give double warnings (both ghost instance and
1056
          # unallocated minor in use)
1057
          node_drbd[minor] = (instance, False)
1058
        else:
1059
          instance = instanceinfo[instance]
1060
          node_drbd[minor] = (instance.name, instance.admin_up)
1061
      result = self._VerifyNode(node_i, file_names, local_checksums,
1062
                                nresult, feedback_fn, master_files,
1063
                                node_drbd, vg_name)
1064
      bad = bad or result
1065

    
1066
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1067
      if vg_name is None:
1068
        node_volume[node] = {}
1069
      elif isinstance(lvdata, basestring):
1070
        feedback_fn("  - ERROR: LVM problem on node %s: %s" %
1071
                    (node, utils.SafeEncode(lvdata)))
1072
        bad = True
1073
        node_volume[node] = {}
1074
      elif not isinstance(lvdata, dict):
1075
        feedback_fn("  - ERROR: connection to %s failed (lvlist)" % (node,))
1076
        bad = True
1077
        continue
1078
      else:
1079
        node_volume[node] = lvdata
1080

    
1081
      # node_instance
1082
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1083
      if not isinstance(idata, list):
1084
        feedback_fn("  - ERROR: connection to %s failed (instancelist)" %
1085
                    (node,))
1086
        bad = True
1087
        continue
1088

    
1089
      node_instance[node] = idata
1090

    
1091
      # node_info
1092
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1093
      if not isinstance(nodeinfo, dict):
1094
        feedback_fn("  - ERROR: connection to %s failed (hvinfo)" % (node,))
1095
        bad = True
1096
        continue
1097

    
1098
      try:
1099
        node_info[node] = {
1100
          "mfree": int(nodeinfo['memory_free']),
1101
          "pinst": [],
1102
          "sinst": [],
1103
          # dictionary holding all instances this node is secondary for,
1104
          # grouped by their primary node. Each key is a cluster node, and each
1105
          # value is a list of instances which have the key as primary and the
1106
          # current node as secondary.  this is handy to calculate N+1 memory
1107
          # availability if you can only failover from a primary to its
1108
          # secondary.
1109
          "sinst-by-pnode": {},
1110
        }
1111
        # FIXME: devise a free space model for file based instances as well
1112
        if vg_name is not None:
1113
          if (constants.NV_VGLIST not in nresult or
1114
              vg_name not in nresult[constants.NV_VGLIST]):
1115
            feedback_fn("  - ERROR: node %s didn't return data for the"
1116
                        " volume group '%s' - it is either missing or broken" %
1117
                        (node, vg_name))
1118
            bad = True
1119
            continue
1120
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1121
      except (ValueError, KeyError):
1122
        feedback_fn("  - ERROR: invalid nodeinfo value returned"
1123
                    " from node %s" % (node,))
1124
        bad = True
1125
        continue
1126

    
1127
    node_vol_should = {}
1128

    
1129
    for instance in instancelist:
1130
      feedback_fn("* Verifying instance %s" % instance)
1131
      inst_config = instanceinfo[instance]
1132
      result =  self._VerifyInstance(instance, inst_config, node_volume,
1133
                                     node_instance, feedback_fn, n_offline)
1134
      bad = bad or result
1135
      inst_nodes_offline = []
1136

    
1137
      inst_config.MapLVsByNode(node_vol_should)
1138

    
1139
      instance_cfg[instance] = inst_config
1140

    
1141
      pnode = inst_config.primary_node
1142
      if pnode in node_info:
1143
        node_info[pnode]['pinst'].append(instance)
1144
      elif pnode not in n_offline:
1145
        feedback_fn("  - ERROR: instance %s, connection to primary node"
1146
                    " %s failed" % (instance, pnode))
1147
        bad = True
1148

    
1149
      if pnode in n_offline:
1150
        inst_nodes_offline.append(pnode)
1151

    
1152
      # If the instance is non-redundant we cannot survive losing its primary
1153
      # node, so we are not N+1 compliant. On the other hand we have no disk
1154
      # templates with more than one secondary so that situation is not well
1155
      # supported either.
1156
      # FIXME: does not support file-backed instances
1157
      if len(inst_config.secondary_nodes) == 0:
1158
        i_non_redundant.append(instance)
1159
      elif len(inst_config.secondary_nodes) > 1:
1160
        feedback_fn("  - WARNING: multiple secondaries for instance %s"
1161
                    % instance)
1162

    
1163
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1164
        i_non_a_balanced.append(instance)
1165

    
1166
      for snode in inst_config.secondary_nodes:
1167
        if snode in node_info:
1168
          node_info[snode]['sinst'].append(instance)
1169
          if pnode not in node_info[snode]['sinst-by-pnode']:
1170
            node_info[snode]['sinst-by-pnode'][pnode] = []
1171
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1172
        elif snode not in n_offline:
1173
          feedback_fn("  - ERROR: instance %s, connection to secondary node"
1174
                      " %s failed" % (instance, snode))
1175
          bad = True
1176
        if snode in n_offline:
1177
          inst_nodes_offline.append(snode)
1178

    
1179
      if inst_nodes_offline:
1180
        # warn that the instance lives on offline nodes, and set bad=True
1181
        feedback_fn("  - ERROR: instance lives on offline node(s) %s" %
1182
                    ", ".join(inst_nodes_offline))
1183
        bad = True
1184

    
1185
    feedback_fn("* Verifying orphan volumes")
1186
    result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
1187
                                       feedback_fn)
1188
    bad = bad or result
1189

    
1190
    feedback_fn("* Verifying remaining instances")
1191
    result = self._VerifyOrphanInstances(instancelist, node_instance,
1192
                                         feedback_fn)
1193
    bad = bad or result
1194

    
1195
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1196
      feedback_fn("* Verifying N+1 Memory redundancy")
1197
      result = self._VerifyNPlusOneMemory(node_info, instance_cfg, feedback_fn)
1198
      bad = bad or result
1199

    
1200
    feedback_fn("* Other Notes")
1201
    if i_non_redundant:
1202
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1203
                  % len(i_non_redundant))
1204

    
1205
    if i_non_a_balanced:
1206
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1207
                  % len(i_non_a_balanced))
1208

    
1209
    if n_offline:
1210
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1211

    
1212
    if n_drained:
1213
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1214

    
1215
    return not bad
1216

    
1217
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1218
    """Analize the post-hooks' result
1219

1220
    This method analyses the hook result, handles it, and sends some
1221
    nicely-formatted feedback back to the user.
1222

1223
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1224
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1225
    @param hooks_results: the results of the multi-node hooks rpc call
1226
    @param feedback_fn: function used send feedback back to the caller
1227
    @param lu_result: previous Exec result
1228
    @return: the new Exec result, based on the previous result
1229
        and hook results
1230

1231
    """
1232
    # We only really run POST phase hooks, and are only interested in
1233
    # their results
1234
    if phase == constants.HOOKS_PHASE_POST:
1235
      # Used to change hooks' output to proper indentation
1236
      indent_re = re.compile('^', re.M)
1237
      feedback_fn("* Hooks Results")
1238
      if not hooks_results:
1239
        feedback_fn("  - ERROR: general communication failure")
1240
        lu_result = 1
1241
      else:
1242
        for node_name in hooks_results:
1243
          show_node_header = True
1244
          res = hooks_results[node_name]
1245
          if res.failed or res.data is False or not isinstance(res.data, list):
1246
            if res.offline:
1247
              # no need to warn or set fail return value
1248
              continue
1249
            feedback_fn("    Communication failure in hooks execution")
1250
            lu_result = 1
1251
            continue
1252
          for script, hkr, output in res.data:
1253
            if hkr == constants.HKR_FAIL:
1254
              # The node header is only shown once, if there are
1255
              # failing hooks on that node
1256
              if show_node_header:
1257
                feedback_fn("  Node %s:" % node_name)
1258
                show_node_header = False
1259
              feedback_fn("    ERROR: Script %s failed, output:" % script)
1260
              output = indent_re.sub('      ', output)
1261
              feedback_fn("%s" % output)
1262
              lu_result = 1
1263

    
1264
      return lu_result
1265

    
1266

    
1267
class LUVerifyDisks(NoHooksLU):
1268
  """Verifies the cluster disks status.
1269

1270
  """
1271
  _OP_REQP = []
1272
  REQ_BGL = False
1273

    
1274
  def ExpandNames(self):
1275
    self.needed_locks = {
1276
      locking.LEVEL_NODE: locking.ALL_SET,
1277
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1278
    }
1279
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1280

    
1281
  def CheckPrereq(self):
1282
    """Check prerequisites.
1283

1284
    This has no prerequisites.
1285

1286
    """
1287
    pass
1288

    
1289
  def Exec(self, feedback_fn):
1290
    """Verify integrity of cluster disks.
1291

1292
    """
1293
    result = res_nodes, res_nlvm, res_instances, res_missing = [], {}, [], {}
1294

    
1295
    vg_name = self.cfg.GetVGName()
1296
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1297
    instances = [self.cfg.GetInstanceInfo(name)
1298
                 for name in self.cfg.GetInstanceList()]
1299

    
1300
    nv_dict = {}
1301
    for inst in instances:
1302
      inst_lvs = {}
1303
      if (not inst.admin_up or
1304
          inst.disk_template not in constants.DTS_NET_MIRROR):
1305
        continue
1306
      inst.MapLVsByNode(inst_lvs)
1307
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1308
      for node, vol_list in inst_lvs.iteritems():
1309
        for vol in vol_list:
1310
          nv_dict[(node, vol)] = inst
1311

    
1312
    if not nv_dict:
1313
      return result
1314

    
1315
    node_lvs = self.rpc.call_volume_list(nodes, vg_name)
1316

    
1317
    to_act = set()
1318
    for node in nodes:
1319
      # node_volume
1320
      lvs = node_lvs[node]
1321
      if lvs.failed:
1322
        if not lvs.offline:
1323
          self.LogWarning("Connection to node %s failed: %s" %
1324
                          (node, lvs.data))
1325
        continue
1326
      lvs = lvs.data
1327
      if isinstance(lvs, basestring):
1328
        logging.warning("Error enumerating LVs on node %s: %s", node, lvs)
1329
        res_nlvm[node] = lvs
1330
        continue
1331
      elif not isinstance(lvs, dict):
1332
        logging.warning("Connection to node %s failed or invalid data"
1333
                        " returned", node)
1334
        res_nodes.append(node)
1335
        continue
1336

    
1337
      for lv_name, (_, lv_inactive, lv_online) in lvs.iteritems():
1338
        inst = nv_dict.pop((node, lv_name), None)
1339
        if (not lv_online and inst is not None
1340
            and inst.name not in res_instances):
1341
          res_instances.append(inst.name)
1342

    
1343
    # any leftover items in nv_dict are missing LVs, let's arrange the
1344
    # data better
1345
    for key, inst in nv_dict.iteritems():
1346
      if inst.name not in res_missing:
1347
        res_missing[inst.name] = []
1348
      res_missing[inst.name].append(key)
1349

    
1350
    return result
1351

    
1352

    
1353
class LURenameCluster(LogicalUnit):
1354
  """Rename the cluster.
1355

1356
  """
1357
  HPATH = "cluster-rename"
1358
  HTYPE = constants.HTYPE_CLUSTER
1359
  _OP_REQP = ["name"]
1360

    
1361
  def BuildHooksEnv(self):
1362
    """Build hooks env.
1363

1364
    """
1365
    env = {
1366
      "OP_TARGET": self.cfg.GetClusterName(),
1367
      "NEW_NAME": self.op.name,
1368
      }
1369
    mn = self.cfg.GetMasterNode()
1370
    return env, [mn], [mn]
1371

    
1372
  def CheckPrereq(self):
1373
    """Verify that the passed name is a valid one.
1374

1375
    """
1376
    hostname = utils.HostInfo(self.op.name)
1377

    
1378
    new_name = hostname.name
1379
    self.ip = new_ip = hostname.ip
1380
    old_name = self.cfg.GetClusterName()
1381
    old_ip = self.cfg.GetMasterIP()
1382
    if new_name == old_name and new_ip == old_ip:
1383
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1384
                                 " cluster has changed")
1385
    if new_ip != old_ip:
1386
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1387
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1388
                                   " reachable on the network. Aborting." %
1389
                                   new_ip)
1390

    
1391
    self.op.name = new_name
1392

    
1393
  def Exec(self, feedback_fn):
1394
    """Rename the cluster.
1395

1396
    """
1397
    clustername = self.op.name
1398
    ip = self.ip
1399

    
1400
    # shutdown the master IP
1401
    master = self.cfg.GetMasterNode()
1402
    result = self.rpc.call_node_stop_master(master, False)
1403
    if result.failed or not result.data:
1404
      raise errors.OpExecError("Could not disable the master role")
1405

    
1406
    try:
1407
      cluster = self.cfg.GetClusterInfo()
1408
      cluster.cluster_name = clustername
1409
      cluster.master_ip = ip
1410
      self.cfg.Update(cluster)
1411

    
1412
      # update the known hosts file
1413
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1414
      node_list = self.cfg.GetNodeList()
1415
      try:
1416
        node_list.remove(master)
1417
      except ValueError:
1418
        pass
1419
      result = self.rpc.call_upload_file(node_list,
1420
                                         constants.SSH_KNOWN_HOSTS_FILE)
1421
      for to_node, to_result in result.iteritems():
1422
         msg = to_result.RemoteFailMsg()
1423
         if msg:
1424
           msg = ("Copy of file %s to node %s failed: %s" %
1425
                   (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1426
           self.proc.LogWarning(msg)
1427

    
1428
    finally:
1429
      result = self.rpc.call_node_start_master(master, False)
1430
      if result.failed or not result.data:
1431
        self.LogWarning("Could not re-enable the master role on"
1432
                        " the master, please restart manually.")
1433

    
1434

    
1435
def _RecursiveCheckIfLVMBased(disk):
1436
  """Check if the given disk or its children are lvm-based.
1437

1438
  @type disk: L{objects.Disk}
1439
  @param disk: the disk to check
1440
  @rtype: booleean
1441
  @return: boolean indicating whether a LD_LV dev_type was found or not
1442

1443
  """
1444
  if disk.children:
1445
    for chdisk in disk.children:
1446
      if _RecursiveCheckIfLVMBased(chdisk):
1447
        return True
1448
  return disk.dev_type == constants.LD_LV
1449

    
1450

    
1451
class LUSetClusterParams(LogicalUnit):
1452
  """Change the parameters of the cluster.
1453

1454
  """
1455
  HPATH = "cluster-modify"
1456
  HTYPE = constants.HTYPE_CLUSTER
1457
  _OP_REQP = []
1458
  REQ_BGL = False
1459

    
1460
  def CheckArguments(self):
1461
    """Check parameters
1462

1463
    """
1464
    if not hasattr(self.op, "candidate_pool_size"):
1465
      self.op.candidate_pool_size = None
1466
    if self.op.candidate_pool_size is not None:
1467
      try:
1468
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1469
      except (ValueError, TypeError), err:
1470
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1471
                                   str(err))
1472
      if self.op.candidate_pool_size < 1:
1473
        raise errors.OpPrereqError("At least one master candidate needed")
1474

    
1475
  def ExpandNames(self):
1476
    # FIXME: in the future maybe other cluster params won't require checking on
1477
    # all nodes to be modified.
1478
    self.needed_locks = {
1479
      locking.LEVEL_NODE: locking.ALL_SET,
1480
    }
1481
    self.share_locks[locking.LEVEL_NODE] = 1
1482

    
1483
  def BuildHooksEnv(self):
1484
    """Build hooks env.
1485

1486
    """
1487
    env = {
1488
      "OP_TARGET": self.cfg.GetClusterName(),
1489
      "NEW_VG_NAME": self.op.vg_name,
1490
      }
1491
    mn = self.cfg.GetMasterNode()
1492
    return env, [mn], [mn]
1493

    
1494
  def CheckPrereq(self):
1495
    """Check prerequisites.
1496

1497
    This checks whether the given params don't conflict and
1498
    if the given volume group is valid.
1499

1500
    """
1501
    if self.op.vg_name is not None and not self.op.vg_name:
1502
      instances = self.cfg.GetAllInstancesInfo().values()
1503
      for inst in instances:
1504
        for disk in inst.disks:
1505
          if _RecursiveCheckIfLVMBased(disk):
1506
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1507
                                       " lvm-based instances exist")
1508

    
1509
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1510

    
1511
    # if vg_name not None, checks given volume group on all nodes
1512
    if self.op.vg_name:
1513
      vglist = self.rpc.call_vg_list(node_list)
1514
      for node in node_list:
1515
        if vglist[node].failed:
1516
          # ignoring down node
1517
          self.LogWarning("Node %s unreachable/error, ignoring" % node)
1518
          continue
1519
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].data,
1520
                                              self.op.vg_name,
1521
                                              constants.MIN_VG_SIZE)
1522
        if vgstatus:
1523
          raise errors.OpPrereqError("Error on node '%s': %s" %
1524
                                     (node, vgstatus))
1525

    
1526
    self.cluster = cluster = self.cfg.GetClusterInfo()
1527
    # validate params changes
1528
    if self.op.beparams:
1529
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1530
      self.new_beparams = objects.FillDict(
1531
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
1532

    
1533
    if self.op.nicparams:
1534
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
1535
      self.new_nicparams = objects.FillDict(
1536
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
1537
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
1538

    
1539
    # hypervisor list/parameters
1540
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
1541
    if self.op.hvparams:
1542
      if not isinstance(self.op.hvparams, dict):
1543
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
1544
      for hv_name, hv_dict in self.op.hvparams.items():
1545
        if hv_name not in self.new_hvparams:
1546
          self.new_hvparams[hv_name] = hv_dict
1547
        else:
1548
          self.new_hvparams[hv_name].update(hv_dict)
1549

    
1550
    if self.op.enabled_hypervisors is not None:
1551
      self.hv_list = self.op.enabled_hypervisors
1552
    else:
1553
      self.hv_list = cluster.enabled_hypervisors
1554

    
1555
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
1556
      # either the enabled list has changed, or the parameters have, validate
1557
      for hv_name, hv_params in self.new_hvparams.items():
1558
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
1559
            (self.op.enabled_hypervisors and
1560
             hv_name in self.op.enabled_hypervisors)):
1561
          # either this is a new hypervisor, or its parameters have changed
1562
          hv_class = hypervisor.GetHypervisor(hv_name)
1563
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1564
          hv_class.CheckParameterSyntax(hv_params)
1565
          _CheckHVParams(self, node_list, hv_name, hv_params)
1566

    
1567
  def Exec(self, feedback_fn):
1568
    """Change the parameters of the cluster.
1569

1570
    """
1571
    if self.op.vg_name is not None:
1572
      new_volume = self.op.vg_name
1573
      if not new_volume:
1574
        new_volume = None
1575
      if new_volume != self.cfg.GetVGName():
1576
        self.cfg.SetVGName(new_volume)
1577
      else:
1578
        feedback_fn("Cluster LVM configuration already in desired"
1579
                    " state, not changing")
1580
    if self.op.hvparams:
1581
      self.cluster.hvparams = self.new_hvparams
1582
    if self.op.enabled_hypervisors is not None:
1583
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1584
    if self.op.beparams:
1585
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
1586
    if self.op.nicparams:
1587
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
1588

    
1589
    if self.op.candidate_pool_size is not None:
1590
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
1591

    
1592
    self.cfg.Update(self.cluster)
1593

    
1594
    # we want to update nodes after the cluster so that if any errors
1595
    # happen, we have recorded and saved the cluster info
1596
    if self.op.candidate_pool_size is not None:
1597
      _AdjustCandidatePool(self)
1598

    
1599

    
1600
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
1601
  """Distribute additional files which are part of the cluster configuration.
1602

1603
  ConfigWriter takes care of distributing the config and ssconf files, but
1604
  there are more files which should be distributed to all nodes. This function
1605
  makes sure those are copied.
1606

1607
  @param lu: calling logical unit
1608
  @param additional_nodes: list of nodes not in the config to distribute to
1609

1610
  """
1611
  # 1. Gather target nodes
1612
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
1613
  dist_nodes = lu.cfg.GetNodeList()
1614
  if additional_nodes is not None:
1615
    dist_nodes.extend(additional_nodes)
1616
  if myself.name in dist_nodes:
1617
    dist_nodes.remove(myself.name)
1618
  # 2. Gather files to distribute
1619
  dist_files = set([constants.ETC_HOSTS,
1620
                    constants.SSH_KNOWN_HOSTS_FILE,
1621
                    constants.RAPI_CERT_FILE,
1622
                    constants.RAPI_USERS_FILE,
1623
                   ])
1624

    
1625
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
1626
  for hv_name in enabled_hypervisors:
1627
    hv_class = hypervisor.GetHypervisor(hv_name)
1628
    dist_files.update(hv_class.GetAncillaryFiles())
1629

    
1630
  # 3. Perform the files upload
1631
  for fname in dist_files:
1632
    if os.path.exists(fname):
1633
      result = lu.rpc.call_upload_file(dist_nodes, fname)
1634
      for to_node, to_result in result.items():
1635
         msg = to_result.RemoteFailMsg()
1636
         if msg:
1637
           msg = ("Copy of file %s to node %s failed: %s" %
1638
                   (fname, to_node, msg))
1639
           lu.proc.LogWarning(msg)
1640

    
1641

    
1642
class LURedistributeConfig(NoHooksLU):
1643
  """Force the redistribution of cluster configuration.
1644

1645
  This is a very simple LU.
1646

1647
  """
1648
  _OP_REQP = []
1649
  REQ_BGL = False
1650

    
1651
  def ExpandNames(self):
1652
    self.needed_locks = {
1653
      locking.LEVEL_NODE: locking.ALL_SET,
1654
    }
1655
    self.share_locks[locking.LEVEL_NODE] = 1
1656

    
1657
  def CheckPrereq(self):
1658
    """Check prerequisites.
1659

1660
    """
1661

    
1662
  def Exec(self, feedback_fn):
1663
    """Redistribute the configuration.
1664

1665
    """
1666
    self.cfg.Update(self.cfg.GetClusterInfo())
1667
    _RedistributeAncillaryFiles(self)
1668

    
1669

    
1670
def _WaitForSync(lu, instance, oneshot=False, unlock=False):
1671
  """Sleep and poll for an instance's disk to sync.
1672

1673
  """
1674
  if not instance.disks:
1675
    return True
1676

    
1677
  if not oneshot:
1678
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
1679

    
1680
  node = instance.primary_node
1681

    
1682
  for dev in instance.disks:
1683
    lu.cfg.SetDiskID(dev, node)
1684

    
1685
  retries = 0
1686
  while True:
1687
    max_time = 0
1688
    done = True
1689
    cumul_degraded = False
1690
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
1691
    if rstats.failed or not rstats.data:
1692
      lu.LogWarning("Can't get any data from node %s", node)
1693
      retries += 1
1694
      if retries >= 10:
1695
        raise errors.RemoteError("Can't contact node %s for mirror data,"
1696
                                 " aborting." % node)
1697
      time.sleep(6)
1698
      continue
1699
    rstats = rstats.data
1700
    retries = 0
1701
    for i, mstat in enumerate(rstats):
1702
      if mstat is None:
1703
        lu.LogWarning("Can't compute data for node %s/%s",
1704
                           node, instance.disks[i].iv_name)
1705
        continue
1706
      # we ignore the ldisk parameter
1707
      perc_done, est_time, is_degraded, _ = mstat
1708
      cumul_degraded = cumul_degraded or (is_degraded and perc_done is None)
1709
      if perc_done is not None:
1710
        done = False
1711
        if est_time is not None:
1712
          rem_time = "%d estimated seconds remaining" % est_time
1713
          max_time = est_time
1714
        else:
1715
          rem_time = "no time estimate"
1716
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
1717
                        (instance.disks[i].iv_name, perc_done, rem_time))
1718
    if done or oneshot:
1719
      break
1720

    
1721
    time.sleep(min(60, max_time))
1722

    
1723
  if done:
1724
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
1725
  return not cumul_degraded
1726

    
1727

    
1728
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
1729
  """Check that mirrors are not degraded.
1730

1731
  The ldisk parameter, if True, will change the test from the
1732
  is_degraded attribute (which represents overall non-ok status for
1733
  the device(s)) to the ldisk (representing the local storage status).
1734

1735
  """
1736
  lu.cfg.SetDiskID(dev, node)
1737
  if ldisk:
1738
    idx = 6
1739
  else:
1740
    idx = 5
1741

    
1742
  result = True
1743
  if on_primary or dev.AssembleOnSecondary():
1744
    rstats = lu.rpc.call_blockdev_find(node, dev)
1745
    msg = rstats.RemoteFailMsg()
1746
    if msg:
1747
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
1748
      result = False
1749
    elif not rstats.payload:
1750
      lu.LogWarning("Can't find disk on node %s", node)
1751
      result = False
1752
    else:
1753
      result = result and (not rstats.payload[idx])
1754
  if dev.children:
1755
    for child in dev.children:
1756
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
1757

    
1758
  return result
1759

    
1760

    
1761
class LUDiagnoseOS(NoHooksLU):
1762
  """Logical unit for OS diagnose/query.
1763

1764
  """
1765
  _OP_REQP = ["output_fields", "names"]
1766
  REQ_BGL = False
1767
  _FIELDS_STATIC = utils.FieldSet()
1768
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status")
1769

    
1770
  def ExpandNames(self):
1771
    if self.op.names:
1772
      raise errors.OpPrereqError("Selective OS query not supported")
1773

    
1774
    _CheckOutputFields(static=self._FIELDS_STATIC,
1775
                       dynamic=self._FIELDS_DYNAMIC,
1776
                       selected=self.op.output_fields)
1777

    
1778
    # Lock all nodes, in shared mode
1779
    # Temporary removal of locks, should be reverted later
1780
    # TODO: reintroduce locks when they are lighter-weight
1781
    self.needed_locks = {}
1782
    #self.share_locks[locking.LEVEL_NODE] = 1
1783
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
1784

    
1785
  def CheckPrereq(self):
1786
    """Check prerequisites.
1787

1788
    """
1789

    
1790
  @staticmethod
1791
  def _DiagnoseByOS(node_list, rlist):
1792
    """Remaps a per-node return list into an a per-os per-node dictionary
1793

1794
    @param node_list: a list with the names of all nodes
1795
    @param rlist: a map with node names as keys and OS objects as values
1796

1797
    @rtype: dict
1798
    @return: a dictionary with osnames as keys and as value another map, with
1799
        nodes as keys and list of OS objects as values, eg::
1800

1801
          {"debian-etch": {"node1": [<object>,...],
1802
                           "node2": [<object>,]}
1803
          }
1804

1805
    """
1806
    all_os = {}
1807
    # we build here the list of nodes that didn't fail the RPC (at RPC
1808
    # level), so that nodes with a non-responding node daemon don't
1809
    # make all OSes invalid
1810
    good_nodes = [node_name for node_name in rlist
1811
                  if not rlist[node_name].failed]
1812
    for node_name, nr in rlist.iteritems():
1813
      if nr.failed or not nr.data:
1814
        continue
1815
      for os_obj in nr.data:
1816
        if os_obj.name not in all_os:
1817
          # build a list of nodes for this os containing empty lists
1818
          # for each node in node_list
1819
          all_os[os_obj.name] = {}
1820
          for nname in good_nodes:
1821
            all_os[os_obj.name][nname] = []
1822
        all_os[os_obj.name][node_name].append(os_obj)
1823
    return all_os
1824

    
1825
  def Exec(self, feedback_fn):
1826
    """Compute the list of OSes.
1827

1828
    """
1829
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
1830
    node_data = self.rpc.call_os_diagnose(valid_nodes)
1831
    if node_data == False:
1832
      raise errors.OpExecError("Can't gather the list of OSes")
1833
    pol = self._DiagnoseByOS(valid_nodes, node_data)
1834
    output = []
1835
    for os_name, os_data in pol.iteritems():
1836
      row = []
1837
      for field in self.op.output_fields:
1838
        if field == "name":
1839
          val = os_name
1840
        elif field == "valid":
1841
          val = utils.all([osl and osl[0] for osl in os_data.values()])
1842
        elif field == "node_status":
1843
          val = {}
1844
          for node_name, nos_list in os_data.iteritems():
1845
            val[node_name] = [(v.status, v.path) for v in nos_list]
1846
        else:
1847
          raise errors.ParameterError(field)
1848
        row.append(val)
1849
      output.append(row)
1850

    
1851
    return output
1852

    
1853

    
1854
class LURemoveNode(LogicalUnit):
1855
  """Logical unit for removing a node.
1856

1857
  """
1858
  HPATH = "node-remove"
1859
  HTYPE = constants.HTYPE_NODE
1860
  _OP_REQP = ["node_name"]
1861

    
1862
  def BuildHooksEnv(self):
1863
    """Build hooks env.
1864

1865
    This doesn't run on the target node in the pre phase as a failed
1866
    node would then be impossible to remove.
1867

1868
    """
1869
    env = {
1870
      "OP_TARGET": self.op.node_name,
1871
      "NODE_NAME": self.op.node_name,
1872
      }
1873
    all_nodes = self.cfg.GetNodeList()
1874
    all_nodes.remove(self.op.node_name)
1875
    return env, all_nodes, all_nodes
1876

    
1877
  def CheckPrereq(self):
1878
    """Check prerequisites.
1879

1880
    This checks:
1881
     - the node exists in the configuration
1882
     - it does not have primary or secondary instances
1883
     - it's not the master
1884

1885
    Any errors are signalled by raising errors.OpPrereqError.
1886

1887
    """
1888
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
1889
    if node is None:
1890
      raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
1891

    
1892
    instance_list = self.cfg.GetInstanceList()
1893

    
1894
    masternode = self.cfg.GetMasterNode()
1895
    if node.name == masternode:
1896
      raise errors.OpPrereqError("Node is the master node,"
1897
                                 " you need to failover first.")
1898

    
1899
    for instance_name in instance_list:
1900
      instance = self.cfg.GetInstanceInfo(instance_name)
1901
      if node.name in instance.all_nodes:
1902
        raise errors.OpPrereqError("Instance %s is still running on the node,"
1903
                                   " please remove first." % instance_name)
1904
    self.op.node_name = node.name
1905
    self.node = node
1906

    
1907
  def Exec(self, feedback_fn):
1908
    """Removes the node from the cluster.
1909

1910
    """
1911
    node = self.node
1912
    logging.info("Stopping the node daemon and removing configs from node %s",
1913
                 node.name)
1914

    
1915
    self.context.RemoveNode(node.name)
1916

    
1917
    self.rpc.call_node_leave_cluster(node.name)
1918

    
1919
    # Promote nodes to master candidate as needed
1920
    _AdjustCandidatePool(self)
1921

    
1922

    
1923
class LUQueryNodes(NoHooksLU):
1924
  """Logical unit for querying nodes.
1925

1926
  """
1927
  _OP_REQP = ["output_fields", "names", "use_locking"]
1928
  REQ_BGL = False
1929
  _FIELDS_DYNAMIC = utils.FieldSet(
1930
    "dtotal", "dfree",
1931
    "mtotal", "mnode", "mfree",
1932
    "bootid",
1933
    "ctotal", "cnodes", "csockets",
1934
    )
1935

    
1936
  _FIELDS_STATIC = utils.FieldSet(
1937
    "name", "pinst_cnt", "sinst_cnt",
1938
    "pinst_list", "sinst_list",
1939
    "pip", "sip", "tags",
1940
    "serial_no",
1941
    "master_candidate",
1942
    "master",
1943
    "offline",
1944
    "drained",
1945
    )
1946

    
1947
  def ExpandNames(self):
1948
    _CheckOutputFields(static=self._FIELDS_STATIC,
1949
                       dynamic=self._FIELDS_DYNAMIC,
1950
                       selected=self.op.output_fields)
1951

    
1952
    self.needed_locks = {}
1953
    self.share_locks[locking.LEVEL_NODE] = 1
1954

    
1955
    if self.op.names:
1956
      self.wanted = _GetWantedNodes(self, self.op.names)
1957
    else:
1958
      self.wanted = locking.ALL_SET
1959

    
1960
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
1961
    self.do_locking = self.do_node_query and self.op.use_locking
1962
    if self.do_locking:
1963
      # if we don't request only static fields, we need to lock the nodes
1964
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
1965

    
1966

    
1967
  def CheckPrereq(self):
1968
    """Check prerequisites.
1969

1970
    """
1971
    # The validation of the node list is done in the _GetWantedNodes,
1972
    # if non empty, and if empty, there's no validation to do
1973
    pass
1974

    
1975
  def Exec(self, feedback_fn):
1976
    """Computes the list of nodes and their attributes.
1977

1978
    """
1979
    all_info = self.cfg.GetAllNodesInfo()
1980
    if self.do_locking:
1981
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
1982
    elif self.wanted != locking.ALL_SET:
1983
      nodenames = self.wanted
1984
      missing = set(nodenames).difference(all_info.keys())
1985
      if missing:
1986
        raise errors.OpExecError(
1987
          "Some nodes were removed before retrieving their data: %s" % missing)
1988
    else:
1989
      nodenames = all_info.keys()
1990

    
1991
    nodenames = utils.NiceSort(nodenames)
1992
    nodelist = [all_info[name] for name in nodenames]
1993

    
1994
    # begin data gathering
1995

    
1996
    if self.do_node_query:
1997
      live_data = {}
1998
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
1999
                                          self.cfg.GetHypervisorType())
2000
      for name in nodenames:
2001
        nodeinfo = node_data[name]
2002
        if not nodeinfo.failed and nodeinfo.data:
2003
          nodeinfo = nodeinfo.data
2004
          fn = utils.TryConvert
2005
          live_data[name] = {
2006
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2007
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2008
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2009
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2010
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2011
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2012
            "bootid": nodeinfo.get('bootid', None),
2013
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2014
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2015
            }
2016
        else:
2017
          live_data[name] = {}
2018
    else:
2019
      live_data = dict.fromkeys(nodenames, {})
2020

    
2021
    node_to_primary = dict([(name, set()) for name in nodenames])
2022
    node_to_secondary = dict([(name, set()) for name in nodenames])
2023

    
2024
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2025
                             "sinst_cnt", "sinst_list"))
2026
    if inst_fields & frozenset(self.op.output_fields):
2027
      instancelist = self.cfg.GetInstanceList()
2028

    
2029
      for instance_name in instancelist:
2030
        inst = self.cfg.GetInstanceInfo(instance_name)
2031
        if inst.primary_node in node_to_primary:
2032
          node_to_primary[inst.primary_node].add(inst.name)
2033
        for secnode in inst.secondary_nodes:
2034
          if secnode in node_to_secondary:
2035
            node_to_secondary[secnode].add(inst.name)
2036

    
2037
    master_node = self.cfg.GetMasterNode()
2038

    
2039
    # end data gathering
2040

    
2041
    output = []
2042
    for node in nodelist:
2043
      node_output = []
2044
      for field in self.op.output_fields:
2045
        if field == "name":
2046
          val = node.name
2047
        elif field == "pinst_list":
2048
          val = list(node_to_primary[node.name])
2049
        elif field == "sinst_list":
2050
          val = list(node_to_secondary[node.name])
2051
        elif field == "pinst_cnt":
2052
          val = len(node_to_primary[node.name])
2053
        elif field == "sinst_cnt":
2054
          val = len(node_to_secondary[node.name])
2055
        elif field == "pip":
2056
          val = node.primary_ip
2057
        elif field == "sip":
2058
          val = node.secondary_ip
2059
        elif field == "tags":
2060
          val = list(node.GetTags())
2061
        elif field == "serial_no":
2062
          val = node.serial_no
2063
        elif field == "master_candidate":
2064
          val = node.master_candidate
2065
        elif field == "master":
2066
          val = node.name == master_node
2067
        elif field == "offline":
2068
          val = node.offline
2069
        elif field == "drained":
2070
          val = node.drained
2071
        elif self._FIELDS_DYNAMIC.Matches(field):
2072
          val = live_data[node.name].get(field, None)
2073
        else:
2074
          raise errors.ParameterError(field)
2075
        node_output.append(val)
2076
      output.append(node_output)
2077

    
2078
    return output
2079

    
2080

    
2081
class LUQueryNodeVolumes(NoHooksLU):
2082
  """Logical unit for getting volumes on node(s).
2083

2084
  """
2085
  _OP_REQP = ["nodes", "output_fields"]
2086
  REQ_BGL = False
2087
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2088
  _FIELDS_STATIC = utils.FieldSet("node")
2089

    
2090
  def ExpandNames(self):
2091
    _CheckOutputFields(static=self._FIELDS_STATIC,
2092
                       dynamic=self._FIELDS_DYNAMIC,
2093
                       selected=self.op.output_fields)
2094

    
2095
    self.needed_locks = {}
2096
    self.share_locks[locking.LEVEL_NODE] = 1
2097
    if not self.op.nodes:
2098
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2099
    else:
2100
      self.needed_locks[locking.LEVEL_NODE] = \
2101
        _GetWantedNodes(self, self.op.nodes)
2102

    
2103
  def CheckPrereq(self):
2104
    """Check prerequisites.
2105

2106
    This checks that the fields required are valid output fields.
2107

2108
    """
2109
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2110

    
2111
  def Exec(self, feedback_fn):
2112
    """Computes the list of nodes and their attributes.
2113

2114
    """
2115
    nodenames = self.nodes
2116
    volumes = self.rpc.call_node_volumes(nodenames)
2117

    
2118
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2119
             in self.cfg.GetInstanceList()]
2120

    
2121
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2122

    
2123
    output = []
2124
    for node in nodenames:
2125
      if node not in volumes or volumes[node].failed or not volumes[node].data:
2126
        continue
2127

    
2128
      node_vols = volumes[node].data[:]
2129
      node_vols.sort(key=lambda vol: vol['dev'])
2130

    
2131
      for vol in node_vols:
2132
        node_output = []
2133
        for field in self.op.output_fields:
2134
          if field == "node":
2135
            val = node
2136
          elif field == "phys":
2137
            val = vol['dev']
2138
          elif field == "vg":
2139
            val = vol['vg']
2140
          elif field == "name":
2141
            val = vol['name']
2142
          elif field == "size":
2143
            val = int(float(vol['size']))
2144
          elif field == "instance":
2145
            for inst in ilist:
2146
              if node not in lv_by_node[inst]:
2147
                continue
2148
              if vol['name'] in lv_by_node[inst][node]:
2149
                val = inst.name
2150
                break
2151
            else:
2152
              val = '-'
2153
          else:
2154
            raise errors.ParameterError(field)
2155
          node_output.append(str(val))
2156

    
2157
        output.append(node_output)
2158

    
2159
    return output
2160

    
2161

    
2162
class LUAddNode(LogicalUnit):
2163
  """Logical unit for adding node to the cluster.
2164

2165
  """
2166
  HPATH = "node-add"
2167
  HTYPE = constants.HTYPE_NODE
2168
  _OP_REQP = ["node_name"]
2169

    
2170
  def BuildHooksEnv(self):
2171
    """Build hooks env.
2172

2173
    This will run on all nodes before, and on all nodes + the new node after.
2174

2175
    """
2176
    env = {
2177
      "OP_TARGET": self.op.node_name,
2178
      "NODE_NAME": self.op.node_name,
2179
      "NODE_PIP": self.op.primary_ip,
2180
      "NODE_SIP": self.op.secondary_ip,
2181
      }
2182
    nodes_0 = self.cfg.GetNodeList()
2183
    nodes_1 = nodes_0 + [self.op.node_name, ]
2184
    return env, nodes_0, nodes_1
2185

    
2186
  def CheckPrereq(self):
2187
    """Check prerequisites.
2188

2189
    This checks:
2190
     - the new node is not already in the config
2191
     - it is resolvable
2192
     - its parameters (single/dual homed) matches the cluster
2193

2194
    Any errors are signalled by raising errors.OpPrereqError.
2195

2196
    """
2197
    node_name = self.op.node_name
2198
    cfg = self.cfg
2199

    
2200
    dns_data = utils.HostInfo(node_name)
2201

    
2202
    node = dns_data.name
2203
    primary_ip = self.op.primary_ip = dns_data.ip
2204
    secondary_ip = getattr(self.op, "secondary_ip", None)
2205
    if secondary_ip is None:
2206
      secondary_ip = primary_ip
2207
    if not utils.IsValidIP(secondary_ip):
2208
      raise errors.OpPrereqError("Invalid secondary IP given")
2209
    self.op.secondary_ip = secondary_ip
2210

    
2211
    node_list = cfg.GetNodeList()
2212
    if not self.op.readd and node in node_list:
2213
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2214
                                 node)
2215
    elif self.op.readd and node not in node_list:
2216
      raise errors.OpPrereqError("Node %s is not in the configuration" % node)
2217

    
2218
    for existing_node_name in node_list:
2219
      existing_node = cfg.GetNodeInfo(existing_node_name)
2220

    
2221
      if self.op.readd and node == existing_node_name:
2222
        if (existing_node.primary_ip != primary_ip or
2223
            existing_node.secondary_ip != secondary_ip):
2224
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2225
                                     " address configuration as before")
2226
        continue
2227

    
2228
      if (existing_node.primary_ip == primary_ip or
2229
          existing_node.secondary_ip == primary_ip or
2230
          existing_node.primary_ip == secondary_ip or
2231
          existing_node.secondary_ip == secondary_ip):
2232
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2233
                                   " existing node %s" % existing_node.name)
2234

    
2235
    # check that the type of the node (single versus dual homed) is the
2236
    # same as for the master
2237
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2238
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2239
    newbie_singlehomed = secondary_ip == primary_ip
2240
    if master_singlehomed != newbie_singlehomed:
2241
      if master_singlehomed:
2242
        raise errors.OpPrereqError("The master has no private ip but the"
2243
                                   " new node has one")
2244
      else:
2245
        raise errors.OpPrereqError("The master has a private ip but the"
2246
                                   " new node doesn't have one")
2247

    
2248
    # checks reachablity
2249
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2250
      raise errors.OpPrereqError("Node not reachable by ping")
2251

    
2252
    if not newbie_singlehomed:
2253
      # check reachability from my secondary ip to newbie's secondary ip
2254
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2255
                           source=myself.secondary_ip):
2256
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2257
                                   " based ping to noded port")
2258

    
2259
    cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2260
    mc_now, _ = self.cfg.GetMasterCandidateStats()
2261
    master_candidate = mc_now < cp_size
2262

    
2263
    self.new_node = objects.Node(name=node,
2264
                                 primary_ip=primary_ip,
2265
                                 secondary_ip=secondary_ip,
2266
                                 master_candidate=master_candidate,
2267
                                 offline=False, drained=False)
2268

    
2269
  def Exec(self, feedback_fn):
2270
    """Adds the new node to the cluster.
2271

2272
    """
2273
    new_node = self.new_node
2274
    node = new_node.name
2275

    
2276
    # check connectivity
2277
    result = self.rpc.call_version([node])[node]
2278
    result.Raise()
2279
    if result.data:
2280
      if constants.PROTOCOL_VERSION == result.data:
2281
        logging.info("Communication to node %s fine, sw version %s match",
2282
                     node, result.data)
2283
      else:
2284
        raise errors.OpExecError("Version mismatch master version %s,"
2285
                                 " node version %s" %
2286
                                 (constants.PROTOCOL_VERSION, result.data))
2287
    else:
2288
      raise errors.OpExecError("Cannot get version from the new node")
2289

    
2290
    # setup ssh on node
2291
    logging.info("Copy ssh key to node %s", node)
2292
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
2293
    keyarray = []
2294
    keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
2295
                constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
2296
                priv_key, pub_key]
2297

    
2298
    for i in keyfiles:
2299
      f = open(i, 'r')
2300
      try:
2301
        keyarray.append(f.read())
2302
      finally:
2303
        f.close()
2304

    
2305
    result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
2306
                                    keyarray[2],
2307
                                    keyarray[3], keyarray[4], keyarray[5])
2308

    
2309
    msg = result.RemoteFailMsg()
2310
    if msg:
2311
      raise errors.OpExecError("Cannot transfer ssh keys to the"
2312
                               " new node: %s" % msg)
2313

    
2314
    # Add node to our /etc/hosts, and add key to known_hosts
2315
    if self.cfg.GetClusterInfo().modify_etc_hosts:
2316
      utils.AddHostToEtcHosts(new_node.name)
2317

    
2318
    if new_node.secondary_ip != new_node.primary_ip:
2319
      result = self.rpc.call_node_has_ip_address(new_node.name,
2320
                                                 new_node.secondary_ip)
2321
      if result.failed or not result.data:
2322
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
2323
                                 " you gave (%s). Please fix and re-run this"
2324
                                 " command." % new_node.secondary_ip)
2325

    
2326
    node_verify_list = [self.cfg.GetMasterNode()]
2327
    node_verify_param = {
2328
      'nodelist': [node],
2329
      # TODO: do a node-net-test as well?
2330
    }
2331

    
2332
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2333
                                       self.cfg.GetClusterName())
2334
    for verifier in node_verify_list:
2335
      if result[verifier].failed or not result[verifier].data:
2336
        raise errors.OpExecError("Cannot communicate with %s's node daemon"
2337
                                 " for remote verification" % verifier)
2338
      if result[verifier].data['nodelist']:
2339
        for failed in result[verifier].data['nodelist']:
2340
          feedback_fn("ssh/hostname verification failed %s -> %s" %
2341
                      (verifier, result[verifier].data['nodelist'][failed]))
2342
        raise errors.OpExecError("ssh/hostname verification failed.")
2343

    
2344
    if self.op.readd:
2345
      _RedistributeAncillaryFiles(self)
2346
      self.context.ReaddNode(new_node)
2347
    else:
2348
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
2349
      self.context.AddNode(new_node)
2350

    
2351

    
2352
class LUSetNodeParams(LogicalUnit):
2353
  """Modifies the parameters of a node.
2354

2355
  """
2356
  HPATH = "node-modify"
2357
  HTYPE = constants.HTYPE_NODE
2358
  _OP_REQP = ["node_name"]
2359
  REQ_BGL = False
2360

    
2361
  def CheckArguments(self):
2362
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2363
    if node_name is None:
2364
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2365
    self.op.node_name = node_name
2366
    _CheckBooleanOpField(self.op, 'master_candidate')
2367
    _CheckBooleanOpField(self.op, 'offline')
2368
    _CheckBooleanOpField(self.op, 'drained')
2369
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
2370
    if all_mods.count(None) == 3:
2371
      raise errors.OpPrereqError("Please pass at least one modification")
2372
    if all_mods.count(True) > 1:
2373
      raise errors.OpPrereqError("Can't set the node into more than one"
2374
                                 " state at the same time")
2375

    
2376
  def ExpandNames(self):
2377
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
2378

    
2379
  def BuildHooksEnv(self):
2380
    """Build hooks env.
2381

2382
    This runs on the master node.
2383

2384
    """
2385
    env = {
2386
      "OP_TARGET": self.op.node_name,
2387
      "MASTER_CANDIDATE": str(self.op.master_candidate),
2388
      "OFFLINE": str(self.op.offline),
2389
      "DRAINED": str(self.op.drained),
2390
      }
2391
    nl = [self.cfg.GetMasterNode(),
2392
          self.op.node_name]
2393
    return env, nl, nl
2394

    
2395
  def CheckPrereq(self):
2396
    """Check prerequisites.
2397

2398
    This only checks the instance list against the existing names.
2399

2400
    """
2401
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
2402

    
2403
    if ((self.op.master_candidate == False or self.op.offline == True or
2404
         self.op.drained == True) and node.master_candidate):
2405
      # we will demote the node from master_candidate
2406
      if self.op.node_name == self.cfg.GetMasterNode():
2407
        raise errors.OpPrereqError("The master node has to be a"
2408
                                   " master candidate, online and not drained")
2409
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2410
      num_candidates, _ = self.cfg.GetMasterCandidateStats()
2411
      if num_candidates <= cp_size:
2412
        msg = ("Not enough master candidates (desired"
2413
               " %d, new value will be %d)" % (cp_size, num_candidates-1))
2414
        if self.op.force:
2415
          self.LogWarning(msg)
2416
        else:
2417
          raise errors.OpPrereqError(msg)
2418

    
2419
    if (self.op.master_candidate == True and
2420
        ((node.offline and not self.op.offline == False) or
2421
         (node.drained and not self.op.drained == False))):
2422
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
2423
                                 " to master_candidate" % node.name)
2424

    
2425
    return
2426

    
2427
  def Exec(self, feedback_fn):
2428
    """Modifies a node.
2429

2430
    """
2431
    node = self.node
2432

    
2433
    result = []
2434
    changed_mc = False
2435

    
2436
    if self.op.offline is not None:
2437
      node.offline = self.op.offline
2438
      result.append(("offline", str(self.op.offline)))
2439
      if self.op.offline == True:
2440
        if node.master_candidate:
2441
          node.master_candidate = False
2442
          changed_mc = True
2443
          result.append(("master_candidate", "auto-demotion due to offline"))
2444
        if node.drained:
2445
          node.drained = False
2446
          result.append(("drained", "clear drained status due to offline"))
2447

    
2448
    if self.op.master_candidate is not None:
2449
      node.master_candidate = self.op.master_candidate
2450
      changed_mc = True
2451
      result.append(("master_candidate", str(self.op.master_candidate)))
2452
      if self.op.master_candidate == False:
2453
        rrc = self.rpc.call_node_demote_from_mc(node.name)
2454
        msg = rrc.RemoteFailMsg()
2455
        if msg:
2456
          self.LogWarning("Node failed to demote itself: %s" % msg)
2457

    
2458
    if self.op.drained is not None:
2459
      node.drained = self.op.drained
2460
      result.append(("drained", str(self.op.drained)))
2461
      if self.op.drained == True:
2462
        if node.master_candidate:
2463
          node.master_candidate = False
2464
          changed_mc = True
2465
          result.append(("master_candidate", "auto-demotion due to drain"))
2466
        if node.offline:
2467
          node.offline = False
2468
          result.append(("offline", "clear offline status due to drain"))
2469

    
2470
    # this will trigger configuration file update, if needed
2471
    self.cfg.Update(node)
2472
    # this will trigger job queue propagation or cleanup
2473
    if changed_mc:
2474
      self.context.ReaddNode(node)
2475

    
2476
    return result
2477

    
2478

    
2479
class LUPowercycleNode(NoHooksLU):
2480
  """Powercycles a node.
2481

2482
  """
2483
  _OP_REQP = ["node_name", "force"]
2484
  REQ_BGL = False
2485

    
2486
  def CheckArguments(self):
2487
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2488
    if node_name is None:
2489
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2490
    self.op.node_name = node_name
2491
    if node_name == self.cfg.GetMasterNode() and not self.op.force:
2492
      raise errors.OpPrereqError("The node is the master and the force"
2493
                                 " parameter was not set")
2494

    
2495
  def ExpandNames(self):
2496
    """Locking for PowercycleNode.
2497

2498
    This is a last-resource option and shouldn't block on other
2499
    jobs. Therefore, we grab no locks.
2500

2501
    """
2502
    self.needed_locks = {}
2503

    
2504
  def CheckPrereq(self):
2505
    """Check prerequisites.
2506

2507
    This LU has no prereqs.
2508

2509
    """
2510
    pass
2511

    
2512
  def Exec(self, feedback_fn):
2513
    """Reboots a node.
2514

2515
    """
2516
    result = self.rpc.call_node_powercycle(self.op.node_name,
2517
                                           self.cfg.GetHypervisorType())
2518
    msg = result.RemoteFailMsg()
2519
    if msg:
2520
      raise errors.OpExecError("Failed to schedule the reboot: %s" % msg)
2521
    return result.payload
2522

    
2523

    
2524
class LUQueryClusterInfo(NoHooksLU):
2525
  """Query cluster configuration.
2526

2527
  """
2528
  _OP_REQP = []
2529
  REQ_BGL = False
2530

    
2531
  def ExpandNames(self):
2532
    self.needed_locks = {}
2533

    
2534
  def CheckPrereq(self):
2535
    """No prerequsites needed for this LU.
2536

2537
    """
2538
    pass
2539

    
2540
  def Exec(self, feedback_fn):
2541
    """Return cluster config.
2542

2543
    """
2544
    cluster = self.cfg.GetClusterInfo()
2545
    result = {
2546
      "software_version": constants.RELEASE_VERSION,
2547
      "protocol_version": constants.PROTOCOL_VERSION,
2548
      "config_version": constants.CONFIG_VERSION,
2549
      "os_api_version": constants.OS_API_VERSION,
2550
      "export_version": constants.EXPORT_VERSION,
2551
      "architecture": (platform.architecture()[0], platform.machine()),
2552
      "name": cluster.cluster_name,
2553
      "master": cluster.master_node,
2554
      "default_hypervisor": cluster.default_hypervisor,
2555
      "enabled_hypervisors": cluster.enabled_hypervisors,
2556
      "hvparams": dict([(hypervisor, cluster.hvparams[hypervisor])
2557
                        for hypervisor in cluster.enabled_hypervisors]),
2558
      "beparams": cluster.beparams,
2559
      "nicparams": cluster.nicparams,
2560
      "candidate_pool_size": cluster.candidate_pool_size,
2561
      "master_netdev": cluster.master_netdev,
2562
      "volume_group_name": cluster.volume_group_name,
2563
      "file_storage_dir": cluster.file_storage_dir,
2564
      }
2565

    
2566
    return result
2567

    
2568

    
2569
class LUQueryConfigValues(NoHooksLU):
2570
  """Return configuration values.
2571

2572
  """
2573
  _OP_REQP = []
2574
  REQ_BGL = False
2575
  _FIELDS_DYNAMIC = utils.FieldSet()
2576
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag")
2577

    
2578
  def ExpandNames(self):
2579
    self.needed_locks = {}
2580

    
2581
    _CheckOutputFields(static=self._FIELDS_STATIC,
2582
                       dynamic=self._FIELDS_DYNAMIC,
2583
                       selected=self.op.output_fields)
2584

    
2585
  def CheckPrereq(self):
2586
    """No prerequisites.
2587

2588
    """
2589
    pass
2590

    
2591
  def Exec(self, feedback_fn):
2592
    """Dump a representation of the cluster config to the standard output.
2593

2594
    """
2595
    values = []
2596
    for field in self.op.output_fields:
2597
      if field == "cluster_name":
2598
        entry = self.cfg.GetClusterName()
2599
      elif field == "master_node":
2600
        entry = self.cfg.GetMasterNode()
2601
      elif field == "drain_flag":
2602
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
2603
      else:
2604
        raise errors.ParameterError(field)
2605
      values.append(entry)
2606
    return values
2607

    
2608

    
2609
class LUActivateInstanceDisks(NoHooksLU):
2610
  """Bring up an instance's disks.
2611

2612
  """
2613
  _OP_REQP = ["instance_name"]
2614
  REQ_BGL = False
2615

    
2616
  def ExpandNames(self):
2617
    self._ExpandAndLockInstance()
2618
    self.needed_locks[locking.LEVEL_NODE] = []
2619
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2620

    
2621
  def DeclareLocks(self, level):
2622
    if level == locking.LEVEL_NODE:
2623
      self._LockInstancesNodes()
2624

    
2625
  def CheckPrereq(self):
2626
    """Check prerequisites.
2627

2628
    This checks that the instance is in the cluster.
2629

2630
    """
2631
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2632
    assert self.instance is not None, \
2633
      "Cannot retrieve locked instance %s" % self.op.instance_name
2634
    _CheckNodeOnline(self, self.instance.primary_node)
2635

    
2636
  def Exec(self, feedback_fn):
2637
    """Activate the disks.
2638

2639
    """
2640
    disks_ok, disks_info = _AssembleInstanceDisks(self, self.instance)
2641
    if not disks_ok:
2642
      raise errors.OpExecError("Cannot activate block devices")
2643

    
2644
    return disks_info
2645

    
2646

    
2647
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
2648
  """Prepare the block devices for an instance.
2649

2650
  This sets up the block devices on all nodes.
2651

2652
  @type lu: L{LogicalUnit}
2653
  @param lu: the logical unit on whose behalf we execute
2654
  @type instance: L{objects.Instance}
2655
  @param instance: the instance for whose disks we assemble
2656
  @type ignore_secondaries: boolean
2657
  @param ignore_secondaries: if true, errors on secondary nodes
2658
      won't result in an error return from the function
2659
  @return: False if the operation failed, otherwise a list of
2660
      (host, instance_visible_name, node_visible_name)
2661
      with the mapping from node devices to instance devices
2662

2663
  """
2664
  device_info = []
2665
  disks_ok = True
2666
  iname = instance.name
2667
  # With the two passes mechanism we try to reduce the window of
2668
  # opportunity for the race condition of switching DRBD to primary
2669
  # before handshaking occured, but we do not eliminate it
2670

    
2671
  # The proper fix would be to wait (with some limits) until the
2672
  # connection has been made and drbd transitions from WFConnection
2673
  # into any other network-connected state (Connected, SyncTarget,
2674
  # SyncSource, etc.)
2675

    
2676
  # 1st pass, assemble on all nodes in secondary mode
2677
  for inst_disk in instance.disks:
2678
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2679
      lu.cfg.SetDiskID(node_disk, node)
2680
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
2681
      msg = result.RemoteFailMsg()
2682
      if msg:
2683
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
2684
                           " (is_primary=False, pass=1): %s",
2685
                           inst_disk.iv_name, node, msg)
2686
        if not ignore_secondaries:
2687
          disks_ok = False
2688

    
2689
  # FIXME: race condition on drbd migration to primary
2690

    
2691
  # 2nd pass, do only the primary node
2692
  for inst_disk in instance.disks:
2693
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2694
      if node != instance.primary_node:
2695
        continue
2696
      lu.cfg.SetDiskID(node_disk, node)
2697
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
2698
      msg = result.RemoteFailMsg()
2699
      if msg:
2700
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
2701
                           " (is_primary=True, pass=2): %s",
2702
                           inst_disk.iv_name, node, msg)
2703
        disks_ok = False
2704
    device_info.append((instance.primary_node, inst_disk.iv_name,
2705
                        result.payload))
2706

    
2707
  # leave the disks configured for the primary node
2708
  # this is a workaround that would be fixed better by
2709
  # improving the logical/physical id handling
2710
  for disk in instance.disks:
2711
    lu.cfg.SetDiskID(disk, instance.primary_node)
2712

    
2713
  return disks_ok, device_info
2714

    
2715

    
2716
def _StartInstanceDisks(lu, instance, force):
2717
  """Start the disks of an instance.
2718

2719
  """
2720
  disks_ok, dummy = _AssembleInstanceDisks(lu, instance,
2721
                                           ignore_secondaries=force)
2722
  if not disks_ok:
2723
    _ShutdownInstanceDisks(lu, instance)
2724
    if force is not None and not force:
2725
      lu.proc.LogWarning("", hint="If the message above refers to a"
2726
                         " secondary node,"
2727
                         " you can retry the operation using '--force'.")
2728
    raise errors.OpExecError("Disk consistency error")
2729

    
2730

    
2731
class LUDeactivateInstanceDisks(NoHooksLU):
2732
  """Shutdown an instance's disks.
2733

2734
  """
2735
  _OP_REQP = ["instance_name"]
2736
  REQ_BGL = False
2737

    
2738
  def ExpandNames(self):
2739
    self._ExpandAndLockInstance()
2740
    self.needed_locks[locking.LEVEL_NODE] = []
2741
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2742

    
2743
  def DeclareLocks(self, level):
2744
    if level == locking.LEVEL_NODE:
2745
      self._LockInstancesNodes()
2746

    
2747
  def CheckPrereq(self):
2748
    """Check prerequisites.
2749

2750
    This checks that the instance is in the cluster.
2751

2752
    """
2753
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2754
    assert self.instance is not None, \
2755
      "Cannot retrieve locked instance %s" % self.op.instance_name
2756

    
2757
  def Exec(self, feedback_fn):
2758
    """Deactivate the disks
2759

2760
    """
2761
    instance = self.instance
2762
    _SafeShutdownInstanceDisks(self, instance)
2763

    
2764

    
2765
def _SafeShutdownInstanceDisks(lu, instance):
2766
  """Shutdown block devices of an instance.
2767

2768
  This function checks if an instance is running, before calling
2769
  _ShutdownInstanceDisks.
2770

2771
  """
2772
  ins_l = lu.rpc.call_instance_list([instance.primary_node],
2773
                                      [instance.hypervisor])
2774
  ins_l = ins_l[instance.primary_node]
2775
  if ins_l.failed or not isinstance(ins_l.data, list):
2776
    raise errors.OpExecError("Can't contact node '%s'" %
2777
                             instance.primary_node)
2778

    
2779
  if instance.name in ins_l.data:
2780
    raise errors.OpExecError("Instance is running, can't shutdown"
2781
                             " block devices.")
2782

    
2783
  _ShutdownInstanceDisks(lu, instance)
2784

    
2785

    
2786
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
2787
  """Shutdown block devices of an instance.
2788

2789
  This does the shutdown on all nodes of the instance.
2790

2791
  If the ignore_primary is false, errors on the primary node are
2792
  ignored.
2793

2794
  """
2795
  all_result = True
2796
  for disk in instance.disks:
2797
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
2798
      lu.cfg.SetDiskID(top_disk, node)
2799
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
2800
      msg = result.RemoteFailMsg()
2801
      if msg:
2802
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
2803
                      disk.iv_name, node, msg)
2804
        if not ignore_primary or node != instance.primary_node:
2805
          all_result = False
2806
  return all_result
2807

    
2808

    
2809
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
2810
  """Checks if a node has enough free memory.
2811

2812
  This function check if a given node has the needed amount of free
2813
  memory. In case the node has less memory or we cannot get the
2814
  information from the node, this function raise an OpPrereqError
2815
  exception.
2816

2817
  @type lu: C{LogicalUnit}
2818
  @param lu: a logical unit from which we get configuration data
2819
  @type node: C{str}
2820
  @param node: the node to check
2821
  @type reason: C{str}
2822
  @param reason: string to use in the error message
2823
  @type requested: C{int}
2824
  @param requested: the amount of memory in MiB to check for
2825
  @type hypervisor_name: C{str}
2826
  @param hypervisor_name: the hypervisor to ask for memory stats
2827
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
2828
      we cannot check the node
2829

2830
  """
2831
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
2832
  nodeinfo[node].Raise()
2833
  free_mem = nodeinfo[node].data.get('memory_free')
2834
  if not isinstance(free_mem, int):
2835
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
2836
                             " was '%s'" % (node, free_mem))
2837
  if requested > free_mem:
2838
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
2839
                             " needed %s MiB, available %s MiB" %
2840
                             (node, reason, requested, free_mem))
2841

    
2842

    
2843
class LUStartupInstance(LogicalUnit):
2844
  """Starts an instance.
2845

2846
  """
2847
  HPATH = "instance-start"
2848
  HTYPE = constants.HTYPE_INSTANCE
2849
  _OP_REQP = ["instance_name", "force"]
2850
  REQ_BGL = False
2851

    
2852
  def ExpandNames(self):
2853
    self._ExpandAndLockInstance()
2854

    
2855
  def BuildHooksEnv(self):
2856
    """Build hooks env.
2857

2858
    This runs on master, primary and secondary nodes of the instance.
2859

2860
    """
2861
    env = {
2862
      "FORCE": self.op.force,
2863
      }
2864
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
2865
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
2866
    return env, nl, nl
2867

    
2868
  def CheckPrereq(self):
2869
    """Check prerequisites.
2870

2871
    This checks that the instance is in the cluster.
2872

2873
    """
2874
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2875
    assert self.instance is not None, \
2876
      "Cannot retrieve locked instance %s" % self.op.instance_name
2877

    
2878
    # extra beparams
2879
    self.beparams = getattr(self.op, "beparams", {})
2880
    if self.beparams:
2881
      if not isinstance(self.beparams, dict):
2882
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
2883
                                   " dict" % (type(self.beparams), ))
2884
      # fill the beparams dict
2885
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
2886
      self.op.beparams = self.beparams
2887

    
2888
    # extra hvparams
2889
    self.hvparams = getattr(self.op, "hvparams", {})
2890
    if self.hvparams:
2891
      if not isinstance(self.hvparams, dict):
2892
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
2893
                                   " dict" % (type(self.hvparams), ))
2894

    
2895
      # check hypervisor parameter syntax (locally)
2896
      cluster = self.cfg.GetClusterInfo()
2897
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
2898
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
2899
                                    instance.hvparams)
2900
      filled_hvp.update(self.hvparams)
2901
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
2902
      hv_type.CheckParameterSyntax(filled_hvp)
2903
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
2904
      self.op.hvparams = self.hvparams
2905

    
2906
    _CheckNodeOnline(self, instance.primary_node)
2907

    
2908
    bep = self.cfg.GetClusterInfo().FillBE(instance)
2909
    # check bridges existance
2910
    _CheckInstanceBridgesExist(self, instance)
2911

    
2912
    remote_info = self.rpc.call_instance_info(instance.primary_node,
2913
                                              instance.name,
2914
                                              instance.hypervisor)
2915
    remote_info.Raise()
2916
    if not remote_info.data:
2917
      _CheckNodeFreeMemory(self, instance.primary_node,
2918
                           "starting instance %s" % instance.name,
2919
                           bep[constants.BE_MEMORY], instance.hypervisor)
2920

    
2921
  def Exec(self, feedback_fn):
2922
    """Start the instance.
2923

2924
    """
2925
    instance = self.instance
2926
    force = self.op.force
2927

    
2928
    self.cfg.MarkInstanceUp(instance.name)
2929

    
2930
    node_current = instance.primary_node
2931

    
2932
    _StartInstanceDisks(self, instance, force)
2933

    
2934
    result = self.rpc.call_instance_start(node_current, instance,
2935
                                          self.hvparams, self.beparams)
2936
    msg = result.RemoteFailMsg()
2937
    if msg:
2938
      _ShutdownInstanceDisks(self, instance)
2939
      raise errors.OpExecError("Could not start instance: %s" % msg)
2940

    
2941

    
2942
class LURebootInstance(LogicalUnit):
2943
  """Reboot an instance.
2944

2945
  """
2946
  HPATH = "instance-reboot"
2947
  HTYPE = constants.HTYPE_INSTANCE
2948
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
2949
  REQ_BGL = False
2950

    
2951
  def ExpandNames(self):
2952
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
2953
                                   constants.INSTANCE_REBOOT_HARD,
2954
                                   constants.INSTANCE_REBOOT_FULL]:
2955
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
2956
                                  (constants.INSTANCE_REBOOT_SOFT,
2957
                                   constants.INSTANCE_REBOOT_HARD,
2958
                                   constants.INSTANCE_REBOOT_FULL))
2959
    self._ExpandAndLockInstance()
2960

    
2961
  def BuildHooksEnv(self):
2962
    """Build hooks env.
2963

2964
    This runs on master, primary and secondary nodes of the instance.
2965

2966
    """
2967
    env = {
2968
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
2969
      "REBOOT_TYPE": self.op.reboot_type,
2970
      }
2971
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
2972
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
2973
    return env, nl, nl
2974

    
2975
  def CheckPrereq(self):
2976
    """Check prerequisites.
2977

2978
    This checks that the instance is in the cluster.
2979

2980
    """
2981
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2982
    assert self.instance is not None, \
2983
      "Cannot retrieve locked instance %s" % self.op.instance_name
2984

    
2985
    _CheckNodeOnline(self, instance.primary_node)
2986

    
2987
    # check bridges existance
2988
    _CheckInstanceBridgesExist(self, instance)
2989

    
2990
  def Exec(self, feedback_fn):
2991
    """Reboot the instance.
2992

2993
    """
2994
    instance = self.instance
2995
    ignore_secondaries = self.op.ignore_secondaries
2996
    reboot_type = self.op.reboot_type
2997

    
2998
    node_current = instance.primary_node
2999

    
3000
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3001
                       constants.INSTANCE_REBOOT_HARD]:
3002
      for disk in instance.disks:
3003
        self.cfg.SetDiskID(disk, node_current)
3004
      result = self.rpc.call_instance_reboot(node_current, instance,
3005
                                             reboot_type)
3006
      msg = result.RemoteFailMsg()
3007
      if msg:
3008
        raise errors.OpExecError("Could not reboot instance: %s" % msg)
3009
    else:
3010
      result = self.rpc.call_instance_shutdown(node_current, instance)
3011
      msg = result.RemoteFailMsg()
3012
      if msg:
3013
        raise errors.OpExecError("Could not shutdown instance for"
3014
                                 " full reboot: %s" % msg)
3015
      _ShutdownInstanceDisks(self, instance)
3016
      _StartInstanceDisks(self, instance, ignore_secondaries)
3017
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3018
      msg = result.RemoteFailMsg()
3019
      if msg:
3020
        _ShutdownInstanceDisks(self, instance)
3021
        raise errors.OpExecError("Could not start instance for"
3022
                                 " full reboot: %s" % msg)
3023

    
3024
    self.cfg.MarkInstanceUp(instance.name)
3025

    
3026

    
3027
class LUShutdownInstance(LogicalUnit):
3028
  """Shutdown an instance.
3029

3030
  """
3031
  HPATH = "instance-stop"
3032
  HTYPE = constants.HTYPE_INSTANCE
3033
  _OP_REQP = ["instance_name"]
3034
  REQ_BGL = False
3035

    
3036
  def ExpandNames(self):
3037
    self._ExpandAndLockInstance()
3038

    
3039
  def BuildHooksEnv(self):
3040
    """Build hooks env.
3041

3042
    This runs on master, primary and secondary nodes of the instance.
3043

3044
    """
3045
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3046
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3047
    return env, nl, nl
3048

    
3049
  def CheckPrereq(self):
3050
    """Check prerequisites.
3051

3052
    This checks that the instance is in the cluster.
3053

3054
    """
3055
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3056
    assert self.instance is not None, \
3057
      "Cannot retrieve locked instance %s" % self.op.instance_name
3058
    _CheckNodeOnline(self, self.instance.primary_node)
3059

    
3060
  def Exec(self, feedback_fn):
3061
    """Shutdown the instance.
3062

3063
    """
3064
    instance = self.instance
3065
    node_current = instance.primary_node
3066
    self.cfg.MarkInstanceDown(instance.name)
3067
    result = self.rpc.call_instance_shutdown(node_current, instance)
3068
    msg = result.RemoteFailMsg()
3069
    if msg:
3070
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3071

    
3072
    _ShutdownInstanceDisks(self, instance)
3073

    
3074

    
3075
class LUReinstallInstance(LogicalUnit):
3076
  """Reinstall an instance.
3077

3078
  """
3079
  HPATH = "instance-reinstall"
3080
  HTYPE = constants.HTYPE_INSTANCE
3081
  _OP_REQP = ["instance_name"]
3082
  REQ_BGL = False
3083

    
3084
  def ExpandNames(self):
3085
    self._ExpandAndLockInstance()
3086

    
3087
  def BuildHooksEnv(self):
3088
    """Build hooks env.
3089

3090
    This runs on master, primary and secondary nodes of the instance.
3091

3092
    """
3093
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3094
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3095
    return env, nl, nl
3096

    
3097
  def CheckPrereq(self):
3098
    """Check prerequisites.
3099

3100
    This checks that the instance is in the cluster and is not running.
3101

3102
    """
3103
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3104
    assert instance is not None, \
3105
      "Cannot retrieve locked instance %s" % self.op.instance_name
3106
    _CheckNodeOnline(self, instance.primary_node)
3107

    
3108
    if instance.disk_template == constants.DT_DISKLESS:
3109
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3110
                                 self.op.instance_name)
3111
    if instance.admin_up:
3112
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3113
                                 self.op.instance_name)
3114
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3115
                                              instance.name,
3116
                                              instance.hypervisor)
3117
    remote_info.Raise()
3118
    if remote_info.data:
3119
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3120
                                 (self.op.instance_name,
3121
                                  instance.primary_node))
3122

    
3123
    self.op.os_type = getattr(self.op, "os_type", None)
3124
    if self.op.os_type is not None:
3125
      # OS verification
3126
      pnode = self.cfg.GetNodeInfo(
3127
        self.cfg.ExpandNodeName(instance.primary_node))
3128
      if pnode is None:
3129
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
3130
                                   self.op.pnode)
3131
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3132
      result.Raise()
3133
      if not isinstance(result.data, objects.OS):
3134
        raise errors.OpPrereqError("OS '%s' not in supported OS list for"
3135
                                   " primary node"  % self.op.os_type)
3136

    
3137
    self.instance = instance
3138

    
3139
  def Exec(self, feedback_fn):
3140
    """Reinstall the instance.
3141

3142
    """
3143
    inst = self.instance
3144

    
3145
    if self.op.os_type is not None:
3146
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3147
      inst.os = self.op.os_type
3148
      self.cfg.Update(inst)
3149

    
3150
    _StartInstanceDisks(self, inst, None)
3151
    try:
3152
      feedback_fn("Running the instance OS create scripts...")
3153
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
3154
      msg = result.RemoteFailMsg()
3155
      if msg:
3156
        raise errors.OpExecError("Could not install OS for instance %s"
3157
                                 " on node %s: %s" %
3158
                                 (inst.name, inst.primary_node, msg))
3159
    finally:
3160
      _ShutdownInstanceDisks(self, inst)
3161

    
3162

    
3163
class LURenameInstance(LogicalUnit):
3164
  """Rename an instance.
3165

3166
  """
3167
  HPATH = "instance-rename"
3168
  HTYPE = constants.HTYPE_INSTANCE
3169
  _OP_REQP = ["instance_name", "new_name"]
3170

    
3171
  def BuildHooksEnv(self):
3172
    """Build hooks env.
3173

3174
    This runs on master, primary and secondary nodes of the instance.
3175

3176
    """
3177
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3178
    env["INSTANCE_NEW_NAME"] = self.op.new_name
3179
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3180
    return env, nl, nl
3181

    
3182
  def CheckPrereq(self):
3183
    """Check prerequisites.
3184

3185
    This checks that the instance is in the cluster and is not running.
3186

3187
    """
3188
    instance = self.cfg.GetInstanceInfo(
3189
      self.cfg.ExpandInstanceName(self.op.instance_name))
3190
    if instance is None:
3191
      raise errors.OpPrereqError("Instance '%s' not known" %
3192
                                 self.op.instance_name)
3193
    _CheckNodeOnline(self, instance.primary_node)
3194

    
3195
    if instance.admin_up:
3196
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3197
                                 self.op.instance_name)
3198
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3199
                                              instance.name,
3200
                                              instance.hypervisor)
3201
    remote_info.Raise()
3202
    if remote_info.data:
3203
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3204
                                 (self.op.instance_name,
3205
                                  instance.primary_node))
3206
    self.instance = instance
3207

    
3208
    # new name verification
3209
    name_info = utils.HostInfo(self.op.new_name)
3210

    
3211
    self.op.new_name = new_name = name_info.name
3212
    instance_list = self.cfg.GetInstanceList()
3213
    if new_name in instance_list:
3214
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3215
                                 new_name)
3216

    
3217
    if not getattr(self.op, "ignore_ip", False):
3218
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
3219
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
3220
                                   (name_info.ip, new_name))
3221

    
3222

    
3223
  def Exec(self, feedback_fn):
3224
    """Reinstall the instance.
3225

3226
    """
3227
    inst = self.instance
3228
    old_name = inst.name
3229

    
3230
    if inst.disk_template == constants.DT_FILE:
3231
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3232

    
3233
    self.cfg.RenameInstance(inst.name, self.op.new_name)
3234
    # Change the instance lock. This is definitely safe while we hold the BGL
3235
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
3236
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
3237

    
3238
    # re-read the instance from the configuration after rename
3239
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
3240

    
3241
    if inst.disk_template == constants.DT_FILE:
3242
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3243
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
3244
                                                     old_file_storage_dir,
3245
                                                     new_file_storage_dir)
3246
      result.Raise()
3247
      if not result.data:
3248
        raise errors.OpExecError("Could not connect to node '%s' to rename"
3249
                                 " directory '%s' to '%s' (but the instance"
3250
                                 " has been renamed in Ganeti)" % (
3251
                                 inst.primary_node, old_file_storage_dir,
3252
                                 new_file_storage_dir))
3253

    
3254
      if not result.data[0]:
3255
        raise errors.OpExecError("Could not rename directory '%s' to '%s'"
3256
                                 " (but the instance has been renamed in"
3257
                                 " Ganeti)" % (old_file_storage_dir,
3258
                                               new_file_storage_dir))
3259

    
3260
    _StartInstanceDisks(self, inst, None)
3261
    try:
3262
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
3263
                                                 old_name)
3264
      msg = result.RemoteFailMsg()
3265
      if msg:
3266
        msg = ("Could not run OS rename script for instance %s on node %s"
3267
               " (but the instance has been renamed in Ganeti): %s" %
3268
               (inst.name, inst.primary_node, msg))
3269
        self.proc.LogWarning(msg)
3270
    finally:
3271
      _ShutdownInstanceDisks(self, inst)
3272

    
3273

    
3274
class LURemoveInstance(LogicalUnit):
3275
  """Remove an instance.
3276

3277
  """
3278
  HPATH = "instance-remove"
3279
  HTYPE = constants.HTYPE_INSTANCE
3280
  _OP_REQP = ["instance_name", "ignore_failures"]
3281
  REQ_BGL = False
3282

    
3283
  def ExpandNames(self):
3284
    self._ExpandAndLockInstance()
3285
    self.needed_locks[locking.LEVEL_NODE] = []
3286
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3287

    
3288
  def DeclareLocks(self, level):
3289
    if level == locking.LEVEL_NODE:
3290
      self._LockInstancesNodes()
3291

    
3292
  def BuildHooksEnv(self):
3293
    """Build hooks env.
3294

3295
    This runs on master, primary and secondary nodes of the instance.
3296

3297
    """
3298
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3299
    nl = [self.cfg.GetMasterNode()]
3300
    return env, nl, nl
3301

    
3302
  def CheckPrereq(self):
3303
    """Check prerequisites.
3304

3305
    This checks that the instance is in the cluster.
3306

3307
    """
3308
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3309
    assert self.instance is not None, \
3310
      "Cannot retrieve locked instance %s" % self.op.instance_name
3311

    
3312
  def Exec(self, feedback_fn):
3313
    """Remove the instance.
3314

3315
    """
3316
    instance = self.instance
3317
    logging.info("Shutting down instance %s on node %s",
3318
                 instance.name, instance.primary_node)
3319

    
3320
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
3321
    msg = result.RemoteFailMsg()
3322
    if msg:
3323
      if self.op.ignore_failures:
3324
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
3325
      else:
3326
        raise errors.OpExecError("Could not shutdown instance %s on"
3327
                                 " node %s: %s" %
3328
                                 (instance.name, instance.primary_node, msg))
3329

    
3330
    logging.info("Removing block devices for instance %s", instance.name)
3331

    
3332
    if not _RemoveDisks(self, instance):
3333
      if self.op.ignore_failures:
3334
        feedback_fn("Warning: can't remove instance's disks")
3335
      else:
3336
        raise errors.OpExecError("Can't remove instance's disks")
3337

    
3338
    logging.info("Removing instance %s out of cluster config", instance.name)
3339

    
3340
    self.cfg.RemoveInstance(instance.name)
3341
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
3342

    
3343

    
3344
class LUQueryInstances(NoHooksLU):
3345
  """Logical unit for querying instances.
3346

3347
  """
3348
  _OP_REQP = ["output_fields", "names", "use_locking"]
3349
  REQ_BGL = False
3350
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
3351
                                    "admin_state",
3352
                                    "disk_template", "ip", "mac", "bridge",
3353
                                    "sda_size", "sdb_size", "vcpus", "tags",
3354
                                    "network_port", "beparams",
3355
                                    r"(disk)\.(size)/([0-9]+)",
3356
                                    r"(disk)\.(sizes)", "disk_usage",
3357
                                    r"(nic)\.(mac|ip|bridge)/([0-9]+)",
3358
                                    r"(nic)\.(macs|ips|bridges)",
3359
                                    r"(disk|nic)\.(count)",
3360
                                    "serial_no", "hypervisor", "hvparams",] +
3361
                                  ["hv/%s" % name
3362
                                   for name in constants.HVS_PARAMETERS] +
3363
                                  ["be/%s" % name
3364
                                   for name in constants.BES_PARAMETERS])
3365
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
3366

    
3367

    
3368
  def ExpandNames(self):
3369
    _CheckOutputFields(static=self._FIELDS_STATIC,
3370
                       dynamic=self._FIELDS_DYNAMIC,
3371
                       selected=self.op.output_fields)
3372

    
3373
    self.needed_locks = {}
3374
    self.share_locks[locking.LEVEL_INSTANCE] = 1
3375
    self.share_locks[locking.LEVEL_NODE] = 1
3376

    
3377
    if self.op.names:
3378
      self.wanted = _GetWantedInstances(self, self.op.names)
3379
    else:
3380
      self.wanted = locking.ALL_SET
3381

    
3382
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3383
    self.do_locking = self.do_node_query and self.op.use_locking
3384
    if self.do_locking:
3385
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3386
      self.needed_locks[locking.LEVEL_NODE] = []
3387
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3388

    
3389
  def DeclareLocks(self, level):
3390
    if level == locking.LEVEL_NODE and self.do_locking:
3391
      self._LockInstancesNodes()
3392

    
3393
  def CheckPrereq(self):
3394
    """Check prerequisites.
3395

3396
    """
3397
    pass
3398

    
3399
  def Exec(self, feedback_fn):
3400
    """Computes the list of nodes and their attributes.
3401

3402
    """
3403
    all_info = self.cfg.GetAllInstancesInfo()
3404
    if self.wanted == locking.ALL_SET:
3405
      # caller didn't specify instance names, so ordering is not important
3406
      if self.do_locking:
3407
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
3408
      else:
3409
        instance_names = all_info.keys()
3410
      instance_names = utils.NiceSort(instance_names)
3411
    else:
3412
      # caller did specify names, so we must keep the ordering
3413
      if self.do_locking:
3414
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
3415
      else:
3416
        tgt_set = all_info.keys()
3417
      missing = set(self.wanted).difference(tgt_set)
3418
      if missing:
3419
        raise errors.OpExecError("Some instances were removed before"
3420
                                 " retrieving their data: %s" % missing)
3421
      instance_names = self.wanted
3422

    
3423
    instance_list = [all_info[iname] for iname in instance_names]
3424

    
3425
    # begin data gathering
3426

    
3427
    nodes = frozenset([inst.primary_node for inst in instance_list])
3428
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
3429

    
3430
    bad_nodes = []
3431
    off_nodes = []
3432
    if self.do_node_query:
3433
      live_data = {}
3434
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
3435
      for name in nodes:
3436
        result = node_data[name]
3437
        if result.offline:
3438
          # offline nodes will be in both lists
3439
          off_nodes.append(name)
3440
        if result.failed:
3441
          bad_nodes.append(name)
3442
        else:
3443
          if result.data:
3444
            live_data.update(result.data)
3445
            # else no instance is alive
3446
    else:
3447
      live_data = dict([(name, {}) for name in instance_names])
3448

    
3449
    # end data gathering
3450

    
3451
    HVPREFIX = "hv/"
3452
    BEPREFIX = "be/"
3453
    output = []
3454
    for instance in instance_list:
3455
      iout = []
3456
      i_hv = self.cfg.GetClusterInfo().FillHV(instance)
3457
      i_be = self.cfg.GetClusterInfo().FillBE(instance)
3458
      for field in self.op.output_fields:
3459
        st_match = self._FIELDS_STATIC.Matches(field)
3460
        if field == "name":
3461
          val = instance.name
3462
        elif field == "os":
3463
          val = instance.os
3464
        elif field == "pnode":
3465
          val = instance.primary_node
3466
        elif field == "snodes":
3467
          val = list(instance.secondary_nodes)
3468
        elif field == "admin_state":
3469
          val = instance.admin_up
3470
        elif field == "oper_state":
3471
          if instance.primary_node in bad_nodes:
3472
            val = None
3473
          else:
3474
            val = bool(live_data.get(instance.name))
3475
        elif field == "status":
3476
          if instance.primary_node in off_nodes:
3477
            val = "ERROR_nodeoffline"
3478
          elif instance.primary_node in bad_nodes:
3479
            val = "ERROR_nodedown"
3480
          else:
3481
            running = bool(live_data.get(instance.name))
3482
            if running:
3483
              if instance.admin_up:
3484
                val = "running"
3485
              else:
3486
                val = "ERROR_up"
3487
            else:
3488
              if instance.admin_up:
3489
                val = "ERROR_down"
3490
              else:
3491
                val = "ADMIN_down"
3492
        elif field == "oper_ram":
3493
          if instance.primary_node in bad_nodes:
3494
            val = None
3495
          elif instance.name in live_data:
3496
            val = live_data[instance.name].get("memory", "?")
3497
          else:
3498
            val = "-"
3499
        elif field == "disk_template":
3500
          val = instance.disk_template
3501
        elif field == "ip":
3502
          val = instance.nics[0].ip
3503
        elif field == "bridge":
3504
          val = instance.nics[0].bridge
3505
        elif field == "mac":
3506
          val = instance.nics[0].mac
3507
        elif field == "sda_size" or field == "sdb_size":
3508
          idx = ord(field[2]) - ord('a')
3509
          try:
3510
            val = instance.FindDisk(idx).size
3511
          except errors.OpPrereqError:
3512
            val = None
3513
        elif field == "disk_usage": # total disk usage per node
3514
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
3515
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
3516
        elif field == "tags":
3517
          val = list(instance.GetTags())
3518
        elif field == "serial_no":
3519
          val = instance.serial_no
3520
        elif field == "network_port":
3521
          val = instance.network_port
3522
        elif field == "hypervisor":
3523
          val = instance.hypervisor
3524
        elif field == "hvparams":
3525
          val = i_hv
3526
        elif (field.startswith(HVPREFIX) and
3527
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
3528
          val = i_hv.get(field[len(HVPREFIX):], None)
3529
        elif field == "beparams":
3530
          val = i_be
3531
        elif (field.startswith(BEPREFIX) and
3532
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
3533
          val = i_be.get(field[len(BEPREFIX):], None)
3534
        elif st_match and st_match.groups():
3535
          # matches a variable list
3536
          st_groups = st_match.groups()
3537
          if st_groups and st_groups[0] == "disk":
3538
            if st_groups[1] == "count":
3539
              val = len(instance.disks)
3540
            elif st_groups[1] == "sizes":
3541
              val = [disk.size for disk in instance.disks]
3542
            elif st_groups[1] == "size":
3543
              try:
3544
                val = instance.FindDisk(st_groups[2]).size
3545
              except errors.OpPrereqError:
3546
                val = None
3547
            else:
3548
              assert False, "Unhandled disk parameter"
3549
          elif st_groups[0] == "nic":
3550
            if st_groups[1] == "count":
3551
              val = len(instance.nics)
3552
            elif st_groups[1] == "macs":
3553
              val = [nic.mac for nic in instance.nics]
3554
            elif st_groups[1] == "ips":
3555
              val = [nic.ip for nic in instance.nics]
3556
            elif st_groups[1] == "bridges":
3557
              val = [nic.bridge for nic in instance.nics]
3558
            else:
3559
              # index-based item
3560
              nic_idx = int(st_groups[2])
3561
              if nic_idx >= len(instance.nics):
3562
                val = None
3563
              else:
3564
                if st_groups[1] == "mac":
3565
                  val = instance.nics[nic_idx].mac
3566
                elif st_groups[1] == "ip":
3567
                  val = instance.nics[nic_idx].ip
3568
                elif st_groups[1] == "bridge":
3569
                  val = instance.nics[nic_idx].bridge
3570
                else:
3571
                  assert False, "Unhandled NIC parameter"
3572
          else:
3573
            assert False, "Unhandled variable parameter"
3574
        else:
3575
          raise errors.ParameterError(field)
3576
        iout.append(val)
3577
      output.append(iout)
3578

    
3579
    return output
3580

    
3581

    
3582
class LUFailoverInstance(LogicalUnit):
3583
  """Failover an instance.
3584

3585
  """
3586
  HPATH = "instance-failover"
3587
  HTYPE = constants.HTYPE_INSTANCE
3588
  _OP_REQP = ["instance_name", "ignore_consistency"]
3589
  REQ_BGL = False
3590

    
3591
  def ExpandNames(self):
3592
    self._ExpandAndLockInstance()
3593
    self.needed_locks[locking.LEVEL_NODE] = []
3594
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3595

    
3596
  def DeclareLocks(self, level):
3597
    if level == locking.LEVEL_NODE:
3598
      self._LockInstancesNodes()
3599

    
3600
  def BuildHooksEnv(self):
3601
    """Build hooks env.
3602

3603
    This runs on master, primary and secondary nodes of the instance.
3604

3605
    """
3606
    env = {
3607
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
3608
      }
3609
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3610
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
3611
    return env, nl, nl
3612

    
3613
  def CheckPrereq(self):
3614
    """Check prerequisites.
3615

3616
    This checks that the instance is in the cluster.
3617

3618
    """
3619
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3620
    assert self.instance is not None, \
3621
      "Cannot retrieve locked instance %s" % self.op.instance_name
3622

    
3623
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3624
    if instance.disk_template not in constants.DTS_NET_MIRROR:
3625
      raise errors.OpPrereqError("Instance's disk layout is not"
3626
                                 " network mirrored, cannot failover.")
3627

    
3628
    secondary_nodes = instance.secondary_nodes
3629
    if not secondary_nodes:
3630
      raise errors.ProgrammerError("no secondary node but using "
3631
                                   "a mirrored disk template")
3632

    
3633
    target_node = secondary_nodes[0]
3634
    _CheckNodeOnline(self, target_node)
3635
    _CheckNodeNotDrained(self, target_node)
3636
    # check memory requirements on the secondary node
3637
    _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
3638
                         instance.name, bep[constants.BE_MEMORY],
3639
                         instance.hypervisor)
3640
    # check bridge existance
3641
    _CheckInstanceBridgesExist(self, instance, node=target_node)
3642

    
3643
  def Exec(self, feedback_fn):
3644
    """Failover an instance.
3645

3646
    The failover is done by shutting it down on its present node and
3647
    starting it on the secondary.
3648

3649
    """
3650
    instance = self.instance
3651

    
3652
    source_node = instance.primary_node
3653
    target_node = instance.secondary_nodes[0]
3654

    
3655
    feedback_fn("* checking disk consistency between source and target")
3656
    for dev in instance.disks:
3657
      # for drbd, these are drbd over lvm
3658
      if not _CheckDiskConsistency(self, dev, target_node, False):
3659
        if instance.admin_up and not self.op.ignore_consistency:
3660
          raise errors.OpExecError("Disk %s is degraded on target node,"
3661
                                   " aborting failover." % dev.iv_name)
3662

    
3663
    feedback_fn("* shutting down instance on source node")
3664
    logging.info("Shutting down instance %s on node %s",
3665
                 instance.name, source_node)
3666

    
3667
    result = self.rpc.call_instance_shutdown(source_node, instance)
3668
    msg = result.RemoteFailMsg()
3669
    if msg:
3670
      if self.op.ignore_consistency:
3671
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
3672
                             " Proceeding anyway. Please make sure node"
3673
                             " %s is down. Error details: %s",
3674
                             instance.name, source_node, source_node, msg)
3675
      else:
3676
        raise errors.OpExecError("Could not shutdown instance %s on"
3677
                                 " node %s: %s" %
3678
                                 (instance.name, source_node, msg))
3679

    
3680
    feedback_fn("* deactivating the instance's disks on source node")
3681
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
3682
      raise errors.OpExecError("Can't shut down the instance's disks.")
3683

    
3684
    instance.primary_node = target_node
3685
    # distribute new instance config to the other nodes
3686
    self.cfg.Update(instance)
3687

    
3688
    # Only start the instance if it's marked as up
3689
    if instance.admin_up:
3690
      feedback_fn("* activating the instance's disks on target node")
3691
      logging.info("Starting instance %s on node %s",
3692
                   instance.name, target_node)
3693

    
3694
      disks_ok, dummy = _AssembleInstanceDisks(self, instance,
3695
                                               ignore_secondaries=True)
3696
      if not disks_ok:
3697
        _ShutdownInstanceDisks(self, instance)
3698
        raise errors.OpExecError("Can't activate the instance's disks")
3699

    
3700
      feedback_fn("* starting the instance on the target node")
3701
      result = self.rpc.call_instance_start(target_node, instance, None, None)
3702
      msg = result.RemoteFailMsg()
3703
      if msg:
3704
        _ShutdownInstanceDisks(self, instance)
3705
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
3706
                                 (instance.name, target_node, msg))
3707

    
3708

    
3709
class LUMigrateInstance(LogicalUnit):
3710
  """Migrate an instance.
3711

3712
  This is migration without shutting down, compared to the failover,
3713
  which is done with shutdown.
3714

3715
  """
3716
  HPATH = "instance-migrate"
3717
  HTYPE = constants.HTYPE_INSTANCE
3718
  _OP_REQP = ["instance_name", "live", "cleanup"]
3719

    
3720
  REQ_BGL = False
3721

    
3722
  def ExpandNames(self):
3723
    self._ExpandAndLockInstance()
3724
    self.needed_locks[locking.LEVEL_NODE] = []
3725
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3726

    
3727
  def DeclareLocks(self, level):
3728
    if level == locking.LEVEL_NODE:
3729
      self._LockInstancesNodes()
3730

    
3731
  def BuildHooksEnv(self):
3732
    """Build hooks env.
3733

3734
    This runs on master, primary and secondary nodes of the instance.
3735

3736
    """
3737
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3738
    env["MIGRATE_LIVE"] = self.op.live
3739
    env["MIGRATE_CLEANUP"] = self.op.cleanup
3740
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
3741
    return env, nl, nl
3742

    
3743
  def CheckPrereq(self):
3744
    """Check prerequisites.
3745

3746
    This checks that the instance is in the cluster.
3747

3748
    """
3749
    instance = self.cfg.GetInstanceInfo(
3750
      self.cfg.ExpandInstanceName(self.op.instance_name))
3751
    if instance is None:
3752
      raise errors.OpPrereqError("Instance '%s' not known" %
3753
                                 self.op.instance_name)
3754

    
3755
    if instance.disk_template != constants.DT_DRBD8:
3756
      raise errors.OpPrereqError("Instance's disk layout is not"
3757
                                 " drbd8, cannot migrate.")
3758

    
3759
    secondary_nodes = instance.secondary_nodes
3760
    if not secondary_nodes:
3761
      raise errors.ConfigurationError("No secondary node but using"
3762
                                      " drbd8 disk template")
3763

    
3764
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
3765

    
3766
    target_node = secondary_nodes[0]
3767
    # check memory requirements on the secondary node
3768
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
3769
                         instance.name, i_be[constants.BE_MEMORY],
3770
                         instance.hypervisor)
3771

    
3772
    # check bridge existance
3773
    _CheckInstanceBridgesExist(self, instance, node=target_node)
3774

    
3775
    if not self.op.cleanup:
3776
      _CheckNodeNotDrained(self, target_node)
3777
      result = self.rpc.call_instance_migratable(instance.primary_node,
3778
                                                 instance)
3779
      msg = result.RemoteFailMsg()
3780
      if msg:
3781
        raise errors.OpPrereqError("Can't migrate: %s - please use failover" %
3782
                                   msg)
3783

    
3784
    self.instance = instance
3785

    
3786
  def _WaitUntilSync(self):
3787
    """Poll with custom rpc for disk sync.
3788

3789
    This uses our own step-based rpc call.
3790

3791
    """
3792
    self.feedback_fn("* wait until resync is done")
3793
    all_done = False
3794
    while not all_done:
3795
      all_done = True
3796
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
3797
                                            self.nodes_ip,
3798
                                            self.instance.disks)
3799
      min_percent = 100
3800
      for node, nres in result.items():
3801
        msg = nres.RemoteFailMsg()
3802
        if msg:
3803
          raise errors.OpExecError("Cannot resync disks on node %s: %s" %
3804
                                   (node, msg))
3805
        node_done, node_percent = nres.payload
3806
        all_done = all_done and node_done
3807
        if node_percent is not None:
3808
          min_percent = min(min_percent, node_percent)
3809
      if not all_done:
3810
        if min_percent < 100:
3811
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
3812
        time.sleep(2)
3813

    
3814
  def _EnsureSecondary(self, node):
3815
    """Demote a node to secondary.
3816

3817
    """
3818
    self.feedback_fn("* switching node %s to secondary mode" % node)
3819

    
3820
    for dev in self.instance.disks:
3821
      self.cfg.SetDiskID(dev, node)
3822

    
3823
    result = self.rpc.call_blockdev_close(node, self.instance.name,
3824
                                          self.instance.disks)
3825
    msg = result.RemoteFailMsg()
3826
    if msg:
3827
      raise errors.OpExecError("Cannot change disk to secondary on node %s,"
3828
                               " error %s" % (node, msg))
3829

    
3830
  def _GoStandalone(self):
3831
    """Disconnect from the network.
3832

3833
    """
3834
    self.feedback_fn("* changing into standalone mode")
3835
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
3836
                                               self.instance.disks)
3837
    for node, nres in result.items():
3838
      msg = nres.RemoteFailMsg()
3839
      if msg:
3840
        raise errors.OpExecError("Cannot disconnect disks node %s,"
3841
                                 " error %s" % (node, msg))
3842

    
3843
  def _GoReconnect(self, multimaster):
3844
    """Reconnect to the network.
3845

3846
    """
3847
    if multimaster:
3848
      msg = "dual-master"
3849
    else:
3850
      msg = "single-master"
3851
    self.feedback_fn("* changing disks into %s mode" % msg)
3852
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
3853
                                           self.instance.disks,
3854
                                           self.instance.name, multimaster)
3855
    for node, nres in result.items():
3856
      msg = nres.RemoteFailMsg()
3857
      if msg:
3858
        raise errors.OpExecError("Cannot change disks config on node %s,"
3859
                                 " error: %s" % (node, msg))
3860

    
3861
  def _ExecCleanup(self):
3862
    """Try to cleanup after a failed migration.
3863

3864
    The cleanup is done by:
3865
      - check that the instance is running only on one node
3866
        (and update the config if needed)
3867
      - change disks on its secondary node to secondary
3868
      - wait until disks are fully synchronized
3869
      - disconnect from the network
3870
      - change disks into single-master mode
3871
      - wait again until disks are fully synchronized
3872

3873
    """
3874
    instance = self.instance
3875
    target_node = self.target_node
3876
    source_node = self.source_node
3877

    
3878
    # check running on only one node
3879
    self.feedback_fn("* checking where the instance actually runs"
3880
                     " (if this hangs, the hypervisor might be in"
3881
                     " a bad state)")
3882
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
3883
    for node, result in ins_l.items():
3884
      result.Raise()
3885
      if not isinstance(result.data, list):
3886
        raise errors.OpExecError("Can't contact node '%s'" % node)
3887

    
3888
    runningon_source = instance.name in ins_l[source_node].data
3889
    runningon_target = instance.name in ins_l[target_node].data
3890

    
3891
    if runningon_source and runningon_target:
3892
      raise errors.OpExecError("Instance seems to be running on two nodes,"
3893
                               " or the hypervisor is confused. You will have"
3894
                               " to ensure manually that it runs only on one"
3895
                               " and restart this operation.")
3896

    
3897
    if not (runningon_source or runningon_target):
3898
      raise errors.OpExecError("Instance does not seem to be running at all."
3899
                               " In this case, it's safer to repair by"
3900
                               " running 'gnt-instance stop' to ensure disk"
3901
                               " shutdown, and then restarting it.")
3902

    
3903
    if runningon_target:
3904
      # the migration has actually succeeded, we need to update the config
3905
      self.feedback_fn("* instance running on secondary node (%s),"
3906
                       " updating config" % target_node)
3907
      instance.primary_node = target_node
3908
      self.cfg.Update(instance)
3909
      demoted_node = source_node
3910
    else:
3911
      self.feedback_fn("* instance confirmed to be running on its"
3912
                       " primary node (%s)" % source_node)
3913
      demoted_node = target_node
3914

    
3915
    self._EnsureSecondary(demoted_node)
3916
    try:
3917
      self._WaitUntilSync()
3918
    except errors.OpExecError:
3919
      # we ignore here errors, since if the device is standalone, it
3920
      # won't be able to sync
3921
      pass
3922
    self._GoStandalone()
3923
    self._GoReconnect(False)
3924
    self._WaitUntilSync()
3925

    
3926
    self.feedback_fn("* done")
3927

    
3928
  def _RevertDiskStatus(self):
3929
    """Try to revert the disk status after a failed migration.
3930

3931
    """
3932
    target_node = self.target_node
3933
    try:
3934
      self._EnsureSecondary(target_node)
3935
      self._GoStandalone()
3936
      self._GoReconnect(False)
3937
      self._WaitUntilSync()
3938
    except errors.OpExecError, err:
3939
      self.LogWarning("Migration failed and I can't reconnect the"
3940
                      " drives: error '%s'\n"
3941
                      "Please look and recover the instance status" %
3942
                      str(err))
3943

    
3944
  def _AbortMigration(self):
3945
    """Call the hypervisor code to abort a started migration.
3946

3947
    """
3948
    instance = self.instance
3949
    target_node = self.target_node
3950
    migration_info = self.migration_info
3951

    
3952
    abort_result = self.rpc.call_finalize_migration(target_node,
3953
                                                    instance,
3954
                                                    migration_info,
3955
                                                    False)
3956
    abort_msg = abort_result.RemoteFailMsg()
3957
    if abort_msg:
3958
      logging.error("Aborting migration failed on target node %s: %s" %
3959
                    (target_node, abort_msg))
3960
      # Don't raise an exception here, as we stil have to try to revert the
3961
      # disk status, even if this step failed.
3962

    
3963
  def _ExecMigration(self):
3964
    """Migrate an instance.
3965

3966
    The migrate is done by:
3967
      - change the disks into dual-master mode
3968
      - wait until disks are fully synchronized again
3969
      - migrate the instance
3970
      - change disks on the new secondary node (the old primary) to secondary
3971
      - wait until disks are fully synchronized
3972
      - change disks into single-master mode
3973

3974
    """
3975
    instance = self.instance
3976
    target_node = self.target_node
3977
    source_node = self.source_node
3978

    
3979
    self.feedback_fn("* checking disk consistency between source and target")
3980
    for dev in instance.disks:
3981
      if not _CheckDiskConsistency(self, dev, target_node, False):
3982
        raise errors.OpExecError("Disk %s is degraded or not fully"
3983
                                 " synchronized on target node,"
3984
                                 " aborting migrate." % dev.iv_name)
3985

    
3986
    # First get the migration information from the remote node
3987
    result = self.rpc.call_migration_info(source_node, instance)
3988
    msg = result.RemoteFailMsg()
3989
    if msg:
3990
      log_err = ("Failed fetching source migration information from %s: %s" %
3991
                 (source_node, msg))
3992
      logging.error(log_err)
3993
      raise errors.OpExecError(log_err)
3994

    
3995
    self.migration_info = migration_info = result.payload
3996

    
3997
    # Then switch the disks to master/master mode
3998
    self._EnsureSecondary(target_node)
3999
    self._GoStandalone()
4000
    self._GoReconnect(True)
4001
    self._WaitUntilSync()
4002

    
4003
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
4004
    result = self.rpc.call_accept_instance(target_node,
4005
                                           instance,
4006
                                           migration_info,
4007
                                           self.nodes_ip[target_node])
4008

    
4009
    msg = result.RemoteFailMsg()
4010
    if msg:
4011
      logging.error("Instance pre-migration failed, trying to revert"
4012
                    " disk status: %s", msg)
4013
      self._AbortMigration()
4014
      self._RevertDiskStatus()
4015
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
4016
                               (instance.name, msg))
4017

    
4018
    self.feedback_fn("* migrating instance to %s" % target_node)
4019
    time.sleep(10)
4020
    result = self.rpc.call_instance_migrate(source_node, instance,
4021
                                            self.nodes_ip[target_node],
4022
                                            self.op.live)
4023
    msg = result.RemoteFailMsg()
4024
    if msg:
4025
      logging.error("Instance migration failed, trying to revert"
4026
                    " disk status: %s", msg)
4027
      self._AbortMigration()
4028
      self._RevertDiskStatus()
4029
      raise errors.OpExecError("Could not migrate instance %s: %s" %
4030
                               (instance.name, msg))
4031
    time.sleep(10)
4032

    
4033
    instance.primary_node = target_node
4034
    # distribute new instance config to the other nodes
4035
    self.cfg.Update(instance)
4036

    
4037
    result = self.rpc.call_finalize_migration(target_node,
4038
                                              instance,
4039
                                              migration_info,
4040
                                              True)
4041
    msg = result.RemoteFailMsg()
4042
    if msg:
4043
      logging.error("Instance migration succeeded, but finalization failed:"
4044
                    " %s" % msg)
4045
      raise errors.OpExecError("Could not finalize instance migration: %s" %
4046
                               msg)
4047

    
4048
    self._EnsureSecondary(source_node)
4049
    self._WaitUntilSync()
4050
    self._GoStandalone()
4051
    self._GoReconnect(False)
4052
    self._WaitUntilSync()
4053

    
4054
    self.feedback_fn("* done")
4055

    
4056
  def Exec(self, feedback_fn):
4057
    """Perform the migration.
4058

4059
    """
4060
    self.feedback_fn = feedback_fn
4061

    
4062
    self.source_node = self.instance.primary_node
4063
    self.target_node = self.instance.secondary_nodes[0]
4064
    self.all_nodes = [self.source_node, self.target_node]
4065
    self.nodes_ip = {
4066
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
4067
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
4068
      }
4069
    if self.op.cleanup:
4070
      return self._ExecCleanup()
4071
    else:
4072
      return self._ExecMigration()
4073

    
4074

    
4075
def _CreateBlockDev(lu, node, instance, device, force_create,
4076
                    info, force_open):
4077
  """Create a tree of block devices on a given node.
4078

4079
  If this device type has to be created on secondaries, create it and
4080
  all its children.
4081

4082
  If not, just recurse to children keeping the same 'force' value.
4083

4084
  @param lu: the lu on whose behalf we execute
4085
  @param node: the node on which to create the device
4086
  @type instance: L{objects.Instance}
4087
  @param instance: the instance which owns the device
4088
  @type device: L{objects.Disk}
4089
  @param device: the device to create
4090
  @type force_create: boolean
4091
  @param force_create: whether to force creation of this device; this
4092
      will be change to True whenever we find a device which has
4093
      CreateOnSecondary() attribute
4094
  @param info: the extra 'metadata' we should attach to the device
4095
      (this will be represented as a LVM tag)
4096
  @type force_open: boolean
4097
  @param force_open: this parameter will be passes to the
4098
      L{backend.BlockdevCreate} function where it specifies
4099
      whether we run on primary or not, and it affects both
4100
      the child assembly and the device own Open() execution
4101

4102
  """
4103
  if device.CreateOnSecondary():
4104
    force_create = True
4105

    
4106
  if device.children:
4107
    for child in device.children:
4108
      _CreateBlockDev(lu, node, instance, child, force_create,
4109
                      info, force_open)
4110

    
4111
  if not force_create:
4112
    return
4113

    
4114
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
4115

    
4116

    
4117
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
4118
  """Create a single block device on a given node.
4119

4120
  This will not recurse over children of the device, so they must be
4121
  created in advance.
4122

4123
  @param lu: the lu on whose behalf we execute
4124
  @param node: the node on which to create the device
4125
  @type instance: L{objects.Instance}
4126
  @param instance: the instance which owns the device
4127
  @type device: L{objects.Disk}
4128
  @param device: the device to create
4129
  @param info: the extra 'metadata' we should attach to the device
4130
      (this will be represented as a LVM tag)
4131
  @type force_open: boolean
4132
  @param force_open: this parameter will be passes to the
4133
      L{backend.BlockdevCreate} function where it specifies
4134
      whether we run on primary or not, and it affects both
4135
      the child assembly and the device own Open() execution
4136

4137
  """
4138
  lu.cfg.SetDiskID(device, node)
4139
  result = lu.rpc.call_blockdev_create(node, device, device.size,
4140
                                       instance.name, force_open, info)
4141
  msg = result.RemoteFailMsg()
4142
  if msg:
4143
    raise errors.OpExecError("Can't create block device %s on"
4144
                             " node %s for instance %s: %s" %
4145
                             (device, node, instance.name, msg))
4146
  if device.physical_id is None:
4147
    device.physical_id = result.payload
4148

    
4149

    
4150
def _GenerateUniqueNames(lu, exts):
4151
  """Generate a suitable LV name.
4152

4153
  This will generate a logical volume name for the given instance.
4154

4155
  """
4156
  results = []
4157
  for val in exts:
4158
    new_id = lu.cfg.GenerateUniqueID()
4159
    results.append("%s%s" % (new_id, val))
4160
  return results
4161

    
4162

    
4163
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
4164
                         p_minor, s_minor):
4165
  """Generate a drbd8 device complete with its children.
4166

4167
  """
4168
  port = lu.cfg.AllocatePort()
4169
  vgname = lu.cfg.GetVGName()
4170
  shared_secret = lu.cfg.GenerateDRBDSecret()
4171
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
4172
                          logical_id=(vgname, names[0]))
4173
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
4174
                          logical_id=(vgname, names[1]))
4175
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
4176
                          logical_id=(primary, secondary, port,
4177
                                      p_minor, s_minor,
4178
                                      shared_secret),
4179
                          children=[dev_data, dev_meta],
4180
                          iv_name=iv_name)
4181
  return drbd_dev
4182

    
4183

    
4184
def _GenerateDiskTemplate(lu, template_name,
4185
                          instance_name, primary_node,
4186
                          secondary_nodes, disk_info,
4187
                          file_storage_dir, file_driver,
4188
                          base_index):
4189
  """Generate the entire disk layout for a given template type.
4190

4191
  """
4192
  #TODO: compute space requirements
4193

    
4194
  vgname = lu.cfg.GetVGName()
4195
  disk_count = len(disk_info)
4196
  disks = []
4197
  if template_name == constants.DT_DISKLESS:
4198
    pass
4199
  elif template_name == constants.DT_PLAIN:
4200
    if len(secondary_nodes) != 0:
4201
      raise errors.ProgrammerError("Wrong template configuration")
4202

    
4203
    names = _GenerateUniqueNames(lu, [".disk%d" % i
4204
                                      for i in range(disk_count)])
4205
    for idx, disk in enumerate(disk_info):
4206
      disk_index = idx + base_index
4207
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
4208
                              logical_id=(vgname, names[idx]),
4209
                              iv_name="disk/%d" % disk_index,
4210
                              mode=disk["mode"])
4211
      disks.append(disk_dev)
4212
  elif template_name == constants.DT_DRBD8:
4213
    if len(secondary_nodes) != 1:
4214
      raise errors.ProgrammerError("Wrong template configuration")
4215
    remote_node = secondary_nodes[0]
4216
    minors = lu.cfg.AllocateDRBDMinor(
4217
      [primary_node, remote_node] * len(disk_info), instance_name)
4218

    
4219
    names = []
4220
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % i
4221
                                               for i in range(disk_count)]):
4222
      names.append(lv_prefix + "_data")
4223
      names.append(lv_prefix + "_meta")
4224
    for idx, disk in enumerate(disk_info):
4225
      disk_index = idx + base_index
4226
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
4227
                                      disk["size"], names[idx*2:idx*2+2],
4228
                                      "disk/%d" % disk_index,
4229
                                      minors[idx*2], minors[idx*2+1])
4230
      disk_dev.mode = disk["mode"]
4231
      disks.append(disk_dev)
4232
  elif template_name == constants.DT_FILE:
4233
    if len(secondary_nodes) != 0:
4234
      raise errors.ProgrammerError("Wrong template configuration")
4235

    
4236
    for idx, disk in enumerate(disk_info):
4237
      disk_index = idx + base_index
4238
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
4239
                              iv_name="disk/%d" % disk_index,
4240
                              logical_id=(file_driver,
4241
                                          "%s/disk%d" % (file_storage_dir,
4242
                                                         disk_index)),
4243
                              mode=disk["mode"])
4244
      disks.append(disk_dev)
4245
  else:
4246
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
4247
  return disks
4248

    
4249

    
4250
def _GetInstanceInfoText(instance):
4251
  """Compute that text that should be added to the disk's metadata.
4252

4253
  """
4254
  return "originstname+%s" % instance.name
4255

    
4256

    
4257
def _CreateDisks(lu, instance):
4258
  """Create all disks for an instance.
4259

4260
  This abstracts away some work from AddInstance.
4261

4262
  @type lu: L{LogicalUnit}
4263
  @param lu: the logical unit on whose behalf we execute
4264
  @type instance: L{objects.Instance}
4265
  @param instance: the instance whose disks we should create
4266
  @rtype: boolean
4267
  @return: the success of the creation
4268

4269
  """
4270
  info = _GetInstanceInfoText(instance)
4271
  pnode = instance.primary_node
4272

    
4273
  if instance.disk_template == constants.DT_FILE:
4274
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4275
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
4276

    
4277
    if result.failed or not result.data:
4278
      raise errors.OpExecError("Could not connect to node '%s'" % pnode)
4279

    
4280
    if not result.data[0]:
4281
      raise errors.OpExecError("Failed to create directory '%s'" %
4282
                               file_storage_dir)
4283

    
4284
  # Note: this needs to be kept in sync with adding of disks in
4285
  # LUSetInstanceParams
4286
  for device in instance.disks:
4287
    logging.info("Creating volume %s for instance %s",
4288
                 device.iv_name, instance.name)
4289
    #HARDCODE
4290
    for node in instance.all_nodes:
4291
      f_create = node == pnode
4292
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
4293

    
4294

    
4295
def _RemoveDisks(lu, instance):
4296
  """Remove all disks for an instance.
4297

4298
  This abstracts away some work from `AddInstance()` and
4299
  `RemoveInstance()`. Note that in case some of the devices couldn't
4300
  be removed, the removal will continue with the other ones (compare
4301
  with `_CreateDisks()`).
4302

4303
  @type lu: L{LogicalUnit}
4304
  @param lu: the logical unit on whose behalf we execute
4305
  @type instance: L{objects.Instance}
4306
  @param instance: the instance whose disks we should remove
4307
  @rtype: boolean
4308
  @return: the success of the removal
4309

4310
  """
4311
  logging.info("Removing block devices for instance %s", instance.name)
4312

    
4313
  all_result = True
4314
  for device in instance.disks:
4315
    for node, disk in device.ComputeNodeTree(instance.primary_node):
4316
      lu.cfg.SetDiskID(disk, node)
4317
      msg = lu.rpc.call_blockdev_remove(node, disk).RemoteFailMsg()
4318
      if msg:
4319
        lu.LogWarning("Could not remove block device %s on node %s,"
4320
                      " continuing anyway: %s", device.iv_name, node, msg)
4321
        all_result = False
4322

    
4323
  if instance.disk_template == constants.DT_FILE:
4324
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4325
    result = lu.rpc.call_file_storage_dir_remove(instance.primary_node,
4326
                                                 file_storage_dir)
4327
    if result.failed or not result.data:
4328
      logging.error("Could not remove directory '%s'", file_storage_dir)
4329
      all_result = False
4330

    
4331
  return all_result
4332

    
4333

    
4334
def _ComputeDiskSize(disk_template, disks):
4335
  """Compute disk size requirements in the volume group
4336

4337
  """
4338
  # Required free disk space as a function of disk and swap space
4339
  req_size_dict = {
4340
    constants.DT_DISKLESS: None,
4341
    constants.DT_PLAIN: sum(d["size"] for d in disks),
4342
    # 128 MB are added for drbd metadata for each disk
4343
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
4344
    constants.DT_FILE: None,
4345
  }
4346

    
4347
  if disk_template not in req_size_dict:
4348
    raise errors.ProgrammerError("Disk template '%s' size requirement"
4349
                                 " is unknown" %  disk_template)
4350

    
4351
  return req_size_dict[disk_template]
4352

    
4353

    
4354
def _CheckHVParams(lu, nodenames, hvname, hvparams):
4355
  """Hypervisor parameter validation.
4356

4357
  This function abstract the hypervisor parameter validation to be
4358
  used in both instance create and instance modify.
4359

4360
  @type lu: L{LogicalUnit}
4361
  @param lu: the logical unit for which we check
4362
  @type nodenames: list
4363
  @param nodenames: the list of nodes on which we should check
4364
  @type hvname: string
4365
  @param hvname: the name of the hypervisor we should use
4366
  @type hvparams: dict
4367
  @param hvparams: the parameters which we need to check
4368
  @raise errors.OpPrereqError: if the parameters are not valid
4369

4370
  """
4371
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
4372
                                                  hvname,
4373
                                                  hvparams)
4374
  for node in nodenames:
4375
    info = hvinfo[node]
4376
    if info.offline:
4377
      continue
4378
    msg = info.RemoteFailMsg()
4379
    if msg:
4380
      raise errors.OpPrereqError("Hypervisor parameter validation"
4381
                                 " failed on node %s: %s" % (node, msg))
4382

    
4383

    
4384
class LUCreateInstance(LogicalUnit):
4385
  """Create an instance.
4386

4387
  """
4388
  HPATH = "instance-add"
4389
  HTYPE = constants.HTYPE_INSTANCE
4390
  _OP_REQP = ["instance_name", "disks", "disk_template",
4391
              "mode", "start",
4392
              "wait_for_sync", "ip_check", "nics",
4393
              "hvparams", "beparams"]
4394
  REQ_BGL = False
4395

    
4396
  def _ExpandNode(self, node):
4397
    """Expands and checks one node name.
4398

4399
    """
4400
    node_full = self.cfg.ExpandNodeName(node)
4401
    if node_full is None:
4402
      raise errors.OpPrereqError("Unknown node %s" % node)
4403
    return node_full
4404

    
4405
  def ExpandNames(self):
4406
    """ExpandNames for CreateInstance.
4407

4408
    Figure out the right locks for instance creation.
4409

4410
    """
4411
    self.needed_locks = {}
4412

    
4413
    # set optional parameters to none if they don't exist
4414
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
4415
      if not hasattr(self.op, attr):
4416
        setattr(self.op, attr, None)
4417

    
4418
    # cheap checks, mostly valid constants given
4419

    
4420
    # verify creation mode
4421
    if self.op.mode not in (constants.INSTANCE_CREATE,
4422
                            constants.INSTANCE_IMPORT):
4423
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
4424
                                 self.op.mode)
4425

    
4426
    # disk template and mirror node verification
4427
    if self.op.disk_template not in constants.DISK_TEMPLATES:
4428
      raise errors.OpPrereqError("Invalid disk template name")
4429

    
4430
    if self.op.hypervisor is None:
4431
      self.op.hypervisor = self.cfg.GetHypervisorType()
4432

    
4433
    cluster = self.cfg.GetClusterInfo()
4434
    enabled_hvs = cluster.enabled_hypervisors
4435
    if self.op.hypervisor not in enabled_hvs:
4436
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
4437
                                 " cluster (%s)" % (self.op.hypervisor,
4438
                                  ",".join(enabled_hvs)))
4439

    
4440
    # check hypervisor parameter syntax (locally)
4441
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4442
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
4443
                                  self.op.hvparams)
4444
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
4445
    hv_type.CheckParameterSyntax(filled_hvp)
4446

    
4447
    # fill and remember the beparams dict
4448
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4449
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
4450
                                    self.op.beparams)
4451

    
4452
    #### instance parameters check
4453

    
4454
    # instance name verification
4455
    hostname1 = utils.HostInfo(self.op.instance_name)
4456
    self.op.instance_name = instance_name = hostname1.name
4457

    
4458
    # this is just a preventive check, but someone might still add this
4459
    # instance in the meantime, and creation will fail at lock-add time
4460
    if instance_name in self.cfg.GetInstanceList():
4461
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4462
                                 instance_name)
4463

    
4464
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
4465

    
4466
    # NIC buildup
4467
    self.nics = []
4468
    for idx, nic in enumerate(self.op.nics):
4469
      nic_mode_req = nic.get("mode", None)
4470
      nic_mode = nic_mode_req
4471
      if nic_mode is None:
4472
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
4473

    
4474
      # in routed mode, for the first nic, the default ip is 'auto'
4475
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
4476
        default_ip_mode = constants.VALUE_AUTO
4477
      else:
4478
        default_ip_mode = constants.VALUE_NONE
4479

    
4480
      # ip validity checks
4481
      ip = nic.get("ip", default_ip_mode)
4482
      if ip is None or ip.lower() == constants.VALUE_NONE:
4483
        nic_ip = None
4484
      elif ip.lower() == constants.VALUE_AUTO:
4485
        nic_ip = hostname1.ip
4486
      else:
4487
        if not utils.IsValidIP(ip):
4488
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
4489
                                     " like a valid IP" % ip)
4490
        nic_ip = ip
4491

    
4492
      # TODO: check the ip for uniqueness !!
4493
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
4494
        raise errors.OpPrereqError("Routed nic mode requires an ip address")
4495

    
4496
      # MAC address verification
4497
      mac = nic.get("mac", constants.VALUE_AUTO)
4498
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
4499
        if not utils.IsValidMac(mac.lower()):
4500
          raise errors.OpPrereqError("Invalid MAC address specified: %s" %
4501
                                     mac)
4502
      # bridge verification
4503
      bridge = nic.get("bridge", None)
4504
      link = nic.get("link", None)
4505
      if bridge and link:
4506
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link' at the same time")
4507
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
4508
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic")
4509
      elif bridge:
4510
        link = bridge
4511

    
4512
      nicparams = {}
4513
      if nic_mode_req:
4514
        nicparams[constants.NIC_MODE] = nic_mode_req
4515
      if link:
4516
        nicparams[constants.NIC_LINK] = link
4517

    
4518
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4519
                                      nicparams)
4520
      objects.NIC.CheckParameterSyntax(check_params)
4521
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
4522

    
4523
    # disk checks/pre-build
4524
    self.disks = []
4525
    for disk in self.op.disks:
4526
      mode = disk.get("mode", constants.DISK_RDWR)
4527
      if mode not in constants.DISK_ACCESS_SET:
4528
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
4529
                                   mode)
4530
      size = disk.get("size", None)
4531
      if size is None:
4532
        raise errors.OpPrereqError("Missing disk size")
4533
      try:
4534
        size = int(size)
4535
      except ValueError:
4536
        raise errors.OpPrereqError("Invalid disk size '%s'" % size)
4537
      self.disks.append({"size": size, "mode": mode})
4538

    
4539
    # used in CheckPrereq for ip ping check
4540
    self.check_ip = hostname1.ip
4541

    
4542
    # file storage checks
4543
    if (self.op.file_driver and
4544
        not self.op.file_driver in constants.FILE_DRIVER):
4545
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
4546
                                 self.op.file_driver)
4547

    
4548
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
4549
      raise errors.OpPrereqError("File storage directory path not absolute")
4550

    
4551
    ### Node/iallocator related checks
4552
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
4553
      raise errors.OpPrereqError("One and only one of iallocator and primary"
4554
                                 " node must be given")
4555

    
4556
    if self.op.iallocator:
4557
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4558
    else:
4559
      self.op.pnode = self._ExpandNode(self.op.pnode)
4560
      nodelist = [self.op.pnode]
4561
      if self.op.snode is not None:
4562
        self.op.snode = self._ExpandNode(self.op.snode)
4563
        nodelist.append(self.op.snode)
4564
      self.needed_locks[locking.LEVEL_NODE] = nodelist
4565

    
4566
    # in case of import lock the source node too
4567
    if self.op.mode == constants.INSTANCE_IMPORT:
4568
      src_node = getattr(self.op, "src_node", None)
4569
      src_path = getattr(self.op, "src_path", None)
4570

    
4571
      if src_path is None:
4572
        self.op.src_path = src_path = self.op.instance_name
4573

    
4574
      if src_node is None:
4575
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4576
        self.op.src_node = None
4577
        if os.path.isabs(src_path):
4578
          raise errors.OpPrereqError("Importing an instance from an absolute"
4579
                                     " path requires a source node option.")
4580
      else:
4581
        self.op.src_node = src_node = self._ExpandNode(src_node)
4582
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4583
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
4584
        if not os.path.isabs(src_path):
4585
          self.op.src_path = src_path = \
4586
            os.path.join(constants.EXPORT_DIR, src_path)
4587

    
4588
    else: # INSTANCE_CREATE
4589
      if getattr(self.op, "os_type", None) is None:
4590
        raise errors.OpPrereqError("No guest OS specified")
4591

    
4592
  def _RunAllocator(self):
4593
    """Run the allocator based on input opcode.
4594

4595
    """
4596
    nics = [n.ToDict() for n in self.nics]
4597
    ial = IAllocator(self,
4598
                     mode=constants.IALLOCATOR_MODE_ALLOC,
4599
                     name=self.op.instance_name,
4600
                     disk_template=self.op.disk_template,
4601
                     tags=[],
4602
                     os=self.op.os_type,
4603
                     vcpus=self.be_full[constants.BE_VCPUS],
4604
                     mem_size=self.be_full[constants.BE_MEMORY],
4605
                     disks=self.disks,
4606
                     nics=nics,
4607
                     hypervisor=self.op.hypervisor,
4608
                     )
4609

    
4610
    ial.Run(self.op.iallocator)
4611

    
4612
    if not ial.success:
4613
      raise errors.OpPrereqError("Can't compute nodes using"
4614
                                 " iallocator '%s': %s" % (self.op.iallocator,
4615
                                                           ial.info))
4616
    if len(ial.nodes) != ial.required_nodes:
4617
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
4618
                                 " of nodes (%s), required %s" %
4619
                                 (self.op.iallocator, len(ial.nodes),
4620
                                  ial.required_nodes))
4621
    self.op.pnode = ial.nodes[0]
4622
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
4623
                 self.op.instance_name, self.op.iallocator,
4624
                 ", ".join(ial.nodes))
4625
    if ial.required_nodes == 2:
4626
      self.op.snode = ial.nodes[1]
4627

    
4628
  def BuildHooksEnv(self):
4629
    """Build hooks env.
4630

4631
    This runs on master, primary and secondary nodes of the instance.
4632

4633
    """
4634
    env = {
4635
      "ADD_MODE": self.op.mode,
4636
      }
4637
    if self.op.mode == constants.INSTANCE_IMPORT:
4638
      env["SRC_NODE"] = self.op.src_node
4639
      env["SRC_PATH"] = self.op.src_path
4640
      env["SRC_IMAGES"] = self.src_images
4641

    
4642
    env.update(_BuildInstanceHookEnv(
4643
      name=self.op.instance_name,
4644
      primary_node=self.op.pnode,
4645
      secondary_nodes=self.secondaries,
4646
      status=self.op.start,
4647
      os_type=self.op.os_type,
4648
      memory=self.be_full[constants.BE_MEMORY],
4649
      vcpus=self.be_full[constants.BE_VCPUS],
4650
      nics=_PreBuildNICHooksList(self, self.nics),
4651
      disk_template=self.op.disk_template,
4652
      disks=[(d["size"], d["mode"]) for d in self.disks],
4653
    ))
4654

    
4655
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
4656
          self.secondaries)
4657
    return env, nl, nl
4658

    
4659

    
4660
  def CheckPrereq(self):
4661
    """Check prerequisites.
4662

4663
    """
4664
    if (not self.cfg.GetVGName() and
4665
        self.op.disk_template not in constants.DTS_NOT_LVM):
4666
      raise errors.OpPrereqError("Cluster does not support lvm-based"
4667
                                 " instances")
4668

    
4669
    if self.op.mode == constants.INSTANCE_IMPORT:
4670
      src_node = self.op.src_node
4671
      src_path = self.op.src_path
4672

    
4673
      if src_node is None:
4674
        exp_list = self.rpc.call_export_list(
4675
          self.acquired_locks[locking.LEVEL_NODE])
4676
        found = False
4677
        for node in exp_list:
4678
          if not exp_list[node].failed and src_path in exp_list[node].data:
4679
            found = True
4680
            self.op.src_node = src_node = node
4681
            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
4682
                                                       src_path)
4683
            break
4684
        if not found:
4685
          raise errors.OpPrereqError("No export found for relative path %s" %
4686
                                      src_path)
4687

    
4688
      _CheckNodeOnline(self, src_node)
4689
      result = self.rpc.call_export_info(src_node, src_path)
4690
      result.Raise()
4691
      if not result.data:
4692
        raise errors.OpPrereqError("No export found in dir %s" % src_path)
4693

    
4694
      export_info = result.data
4695
      if not export_info.has_section(constants.INISECT_EXP):
4696
        raise errors.ProgrammerError("Corrupted export config")
4697

    
4698
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
4699
      if (int(ei_version) != constants.EXPORT_VERSION):
4700
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
4701
                                   (ei_version, constants.EXPORT_VERSION))
4702

    
4703
      # Check that the new instance doesn't have less disks than the export
4704
      instance_disks = len(self.disks)
4705
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
4706
      if instance_disks < export_disks:
4707
        raise errors.OpPrereqError("Not enough disks to import."
4708
                                   " (instance: %d, export: %d)" %
4709
                                   (instance_disks, export_disks))
4710

    
4711
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
4712
      disk_images = []
4713
      for idx in range(export_disks):
4714
        option = 'disk%d_dump' % idx
4715
        if export_info.has_option(constants.INISECT_INS, option):
4716
          # FIXME: are the old os-es, disk sizes, etc. useful?
4717
          export_name = export_info.get(constants.INISECT_INS, option)
4718
          image = os.path.join(src_path, export_name)
4719
          disk_images.append(image)
4720
        else:
4721
          disk_images.append(False)
4722

    
4723
      self.src_images = disk_images
4724

    
4725
      old_name = export_info.get(constants.INISECT_INS, 'name')
4726
      # FIXME: int() here could throw a ValueError on broken exports
4727
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
4728
      if self.op.instance_name == old_name:
4729
        for idx, nic in enumerate(self.nics):
4730
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
4731
            nic_mac_ini = 'nic%d_mac' % idx
4732
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
4733

    
4734
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
4735
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
4736
    if self.op.start and not self.op.ip_check:
4737
      raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
4738
                                 " adding an instance in start mode")
4739

    
4740
    if self.op.ip_check:
4741
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
4742
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4743
                                   (self.check_ip, self.op.instance_name))
4744

    
4745
    #### mac address generation
4746
    # By generating here the mac address both the allocator and the hooks get
4747
    # the real final mac address rather than the 'auto' or 'generate' value.
4748
    # There is a race condition between the generation and the instance object
4749
    # creation, which means that we know the mac is valid now, but we're not
4750
    # sure it will be when we actually add the instance. If things go bad
4751
    # adding the instance will abort because of a duplicate mac, and the
4752
    # creation job will fail.
4753
    for nic in self.nics:
4754
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
4755
        nic.mac = self.cfg.GenerateMAC()
4756

    
4757
    #### allocator run
4758

    
4759
    if self.op.iallocator is not None:
4760
      self._RunAllocator()
4761

    
4762
    #### node related checks
4763

    
4764
    # check primary node
4765
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
4766
    assert self.pnode is not None, \
4767
      "Cannot retrieve locked node %s" % self.op.pnode
4768
    if pnode.offline:
4769
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
4770
                                 pnode.name)
4771
    if pnode.drained:
4772
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
4773
                                 pnode.name)
4774

    
4775
    self.secondaries = []
4776

    
4777
    # mirror node verification
4778
    if self.op.disk_template in constants.DTS_NET_MIRROR:
4779
      if self.op.snode is None:
4780
        raise errors.OpPrereqError("The networked disk templates need"
4781
                                   " a mirror node")
4782
      if self.op.snode == pnode.name:
4783
        raise errors.OpPrereqError("The secondary node cannot be"
4784
                                   " the primary node.")
4785
      _CheckNodeOnline(self, self.op.snode)
4786
      _CheckNodeNotDrained(self, self.op.snode)
4787
      self.secondaries.append(self.op.snode)
4788

    
4789
    nodenames = [pnode.name] + self.secondaries
4790

    
4791
    req_size = _ComputeDiskSize(self.op.disk_template,
4792
                                self.disks)
4793

    
4794
    # Check lv size requirements
4795
    if req_size is not None:
4796
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
4797
                                         self.op.hypervisor)
4798
      for node in nodenames:
4799
        info = nodeinfo[node]
4800
        info.Raise()
4801
        info = info.data
4802
        if not info:
4803
          raise errors.OpPrereqError("Cannot get current information"
4804
                                     " from node '%s'" % node)
4805
        vg_free = info.get('vg_free', None)
4806
        if not isinstance(vg_free, int):
4807
          raise errors.OpPrereqError("Can't compute free disk space on"
4808
                                     " node %s" % node)
4809
        if req_size > info['vg_free']:
4810
          raise errors.OpPrereqError("Not enough disk space on target node %s."
4811
                                     " %d MB available, %d MB required" %
4812
                                     (node, info['vg_free'], req_size))
4813

    
4814
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
4815

    
4816
    # os verification
4817
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
4818
    result.Raise()
4819
    if not isinstance(result.data, objects.OS):
4820
      raise errors.OpPrereqError("OS '%s' not in supported os list for"
4821
                                 " primary node"  % self.op.os_type)
4822

    
4823
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
4824

    
4825
    # memory check on primary node
4826
    if self.op.start:
4827
      _CheckNodeFreeMemory(self, self.pnode.name,
4828
                           "creating instance %s" % self.op.instance_name,
4829
                           self.be_full[constants.BE_MEMORY],
4830
                           self.op.hypervisor)
4831

    
4832
  def Exec(self, feedback_fn):
4833
    """Create and add the instance to the cluster.
4834

4835
    """
4836
    instance = self.op.instance_name
4837
    pnode_name = self.pnode.name
4838

    
4839
    ht_kind = self.op.hypervisor
4840
    if ht_kind in constants.HTS_REQ_PORT:
4841
      network_port = self.cfg.AllocatePort()
4842
    else:
4843
      network_port = None
4844

    
4845
    ##if self.op.vnc_bind_address is None:
4846
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
4847

    
4848
    # this is needed because os.path.join does not accept None arguments
4849
    if self.op.file_storage_dir is None:
4850
      string_file_storage_dir = ""
4851
    else:
4852
      string_file_storage_dir = self.op.file_storage_dir
4853

    
4854
    # build the full file storage dir path
4855
    file_storage_dir = os.path.normpath(os.path.join(
4856
                                        self.cfg.GetFileStorageDir(),
4857
                                        string_file_storage_dir, instance))
4858

    
4859

    
4860
    disks = _GenerateDiskTemplate(self,
4861
                                  self.op.disk_template,
4862
                                  instance, pnode_name,
4863
                                  self.secondaries,
4864
                                  self.disks,
4865
                                  file_storage_dir,
4866
                                  self.op.file_driver,
4867
                                  0)
4868

    
4869
    iobj = objects.Instance(name=instance, os=self.op.os_type,
4870
                            primary_node=pnode_name,
4871
                            nics=self.nics, disks=disks,
4872
                            disk_template=self.op.disk_template,
4873
                            admin_up=False,
4874
                            network_port=network_port,
4875
                            beparams=self.op.beparams,
4876
                            hvparams=self.op.hvparams,
4877
                            hypervisor=self.op.hypervisor,
4878
                            )
4879

    
4880
    feedback_fn("* creating instance disks...")
4881
    try:
4882
      _CreateDisks(self, iobj)
4883
    except errors.OpExecError:
4884
      self.LogWarning("Device creation failed, reverting...")
4885
      try:
4886
        _RemoveDisks(self, iobj)
4887
      finally:
4888
        self.cfg.ReleaseDRBDMinors(instance)
4889
        raise
4890

    
4891
    feedback_fn("adding instance %s to cluster config" % instance)
4892

    
4893
    self.cfg.AddInstance(iobj)
4894
    # Declare that we don't want to remove the instance lock anymore, as we've
4895
    # added the instance to the config
4896
    del self.remove_locks[locking.LEVEL_INSTANCE]
4897
    # Unlock all the nodes
4898
    if self.op.mode == constants.INSTANCE_IMPORT:
4899
      nodes_keep = [self.op.src_node]
4900
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
4901
                       if node != self.op.src_node]
4902
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
4903
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
4904
    else:
4905
      self.context.glm.release(locking.LEVEL_NODE)
4906
      del self.acquired_locks[locking.LEVEL_NODE]
4907

    
4908
    if self.op.wait_for_sync:
4909
      disk_abort = not _WaitForSync(self, iobj)
4910
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
4911
      # make sure the disks are not degraded (still sync-ing is ok)
4912
      time.sleep(15)
4913
      feedback_fn("* checking mirrors status")
4914
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
4915
    else:
4916
      disk_abort = False
4917

    
4918
    if disk_abort:
4919
      _RemoveDisks(self, iobj)
4920
      self.cfg.RemoveInstance(iobj.name)
4921
      # Make sure the instance lock gets removed
4922
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
4923
      raise errors.OpExecError("There are some degraded disks for"
4924
                               " this instance")
4925

    
4926
    feedback_fn("creating os for instance %s on node %s" %
4927
                (instance, pnode_name))
4928

    
4929
    if iobj.disk_template != constants.DT_DISKLESS:
4930
      if self.op.mode == constants.INSTANCE_CREATE:
4931
        feedback_fn("* running the instance OS create scripts...")
4932
        result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
4933
        msg = result.RemoteFailMsg()
4934
        if msg:
4935
          raise errors.OpExecError("Could not add os for instance %s"
4936
                                   " on node %s: %s" %
4937
                                   (instance, pnode_name, msg))
4938

    
4939
      elif self.op.mode == constants.INSTANCE_IMPORT:
4940
        feedback_fn("* running the instance OS import scripts...")
4941
        src_node = self.op.src_node
4942
        src_images = self.src_images
4943
        cluster_name = self.cfg.GetClusterName()
4944
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
4945
                                                         src_node, src_images,
4946
                                                         cluster_name)
4947
        import_result.Raise()
4948
        for idx, result in enumerate(import_result.data):
4949
          if not result:
4950
            self.LogWarning("Could not import the image %s for instance"
4951
                            " %s, disk %d, on node %s" %
4952
                            (src_images[idx], instance, idx, pnode_name))
4953
      else:
4954
        # also checked in the prereq part
4955
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
4956
                                     % self.op.mode)
4957

    
4958
    if self.op.start:
4959
      iobj.admin_up = True
4960
      self.cfg.Update(iobj)
4961
      logging.info("Starting instance %s on node %s", instance, pnode_name)
4962
      feedback_fn("* starting instance...")
4963
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
4964
      msg = result.RemoteFailMsg()
4965
      if msg:
4966
        raise errors.OpExecError("Could not start instance: %s" % msg)
4967

    
4968

    
4969
class LUConnectConsole(NoHooksLU):
4970
  """Connect to an instance's console.
4971

4972
  This is somewhat special in that it returns the command line that
4973
  you need to run on the master node in order to connect to the
4974
  console.
4975

4976
  """
4977
  _OP_REQP = ["instance_name"]
4978
  REQ_BGL = False
4979

    
4980
  def ExpandNames(self):
4981
    self._ExpandAndLockInstance()
4982

    
4983
  def CheckPrereq(self):
4984
    """Check prerequisites.
4985

4986
    This checks that the instance is in the cluster.
4987

4988
    """
4989
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4990
    assert self.instance is not None, \
4991
      "Cannot retrieve locked instance %s" % self.op.instance_name
4992
    _CheckNodeOnline(self, self.instance.primary_node)
4993

    
4994
  def Exec(self, feedback_fn):
4995
    """Connect to the console of an instance
4996

4997
    """
4998
    instance = self.instance
4999
    node = instance.primary_node
5000

    
5001
    node_insts = self.rpc.call_instance_list([node],
5002
                                             [instance.hypervisor])[node]
5003
    node_insts.Raise()
5004

    
5005
    if instance.name not in node_insts.data:
5006
      raise errors.OpExecError("Instance %s is not running." % instance.name)
5007

    
5008
    logging.debug("Connecting to console of %s on %s", instance.name, node)
5009

    
5010
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
5011
    cluster = self.cfg.GetClusterInfo()
5012
    # beparams and hvparams are passed separately, to avoid editing the
5013
    # instance and then saving the defaults in the instance itself.
5014
    hvparams = cluster.FillHV(instance)
5015
    beparams = cluster.FillBE(instance)
5016
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
5017

    
5018
    # build ssh cmdline
5019
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
5020

    
5021

    
5022
class LUReplaceDisks(LogicalUnit):
5023
  """Replace the disks of an instance.
5024

5025
  """
5026
  HPATH = "mirrors-replace"
5027
  HTYPE = constants.HTYPE_INSTANCE
5028
  _OP_REQP = ["instance_name", "mode", "disks"]
5029
  REQ_BGL = False
5030

    
5031
  def CheckArguments(self):
5032
    if not hasattr(self.op, "remote_node"):
5033
      self.op.remote_node = None
5034
    if not hasattr(self.op, "iallocator"):
5035
      self.op.iallocator = None
5036

    
5037
    # check for valid parameter combination
5038
    cnt = [self.op.remote_node, self.op.iallocator].count(None)
5039
    if self.op.mode == constants.REPLACE_DISK_CHG:
5040
      if cnt == 2:
5041
        raise errors.OpPrereqError("When changing the secondary either an"
5042
                                   " iallocator script must be used or the"
5043
                                   " new node given")
5044
      elif cnt == 0:
5045
        raise errors.OpPrereqError("Give either the iallocator or the new"
5046
                                   " secondary, not both")
5047
    else: # not replacing the secondary
5048
      if cnt != 2:
5049
        raise errors.OpPrereqError("The iallocator and new node options can"
5050
                                   " be used only when changing the"
5051
                                   " secondary node")
5052

    
5053
  def ExpandNames(self):
5054
    self._ExpandAndLockInstance()
5055

    
5056
    if self.op.iallocator is not None:
5057
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5058
    elif self.op.remote_node is not None:
5059
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
5060
      if remote_node is None:
5061
        raise errors.OpPrereqError("Node '%s' not known" %
5062
                                   self.op.remote_node)
5063
      self.op.remote_node = remote_node
5064
      # Warning: do not remove the locking of the new secondary here
5065
      # unless DRBD8.AddChildren is changed to work in parallel;
5066
      # currently it doesn't since parallel invocations of
5067
      # FindUnusedMinor will conflict
5068
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
5069
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5070
    else:
5071
      self.needed_locks[locking.LEVEL_NODE] = []
5072
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5073

    
5074
  def DeclareLocks(self, level):
5075
    # If we're not already locking all nodes in the set we have to declare the
5076
    # instance's primary/secondary nodes.
5077
    if (level == locking.LEVEL_NODE and
5078
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
5079
      self._LockInstancesNodes()
5080

    
5081
  def _RunAllocator(self):
5082
    """Compute a new secondary node using an IAllocator.
5083

5084
    """
5085
    ial = IAllocator(self,
5086
                     mode=constants.IALLOCATOR_MODE_RELOC,
5087
                     name=self.op.instance_name,
5088
                     relocate_from=[self.sec_node])
5089

    
5090
    ial.Run(self.op.iallocator)
5091

    
5092
    if not ial.success:
5093
      raise errors.OpPrereqError("Can't compute nodes using"
5094
                                 " iallocator '%s': %s" % (self.op.iallocator,
5095
                                                           ial.info))
5096
    if len(ial.nodes) != ial.required_nodes:
5097
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5098
                                 " of nodes (%s), required %s" %
5099
                                 (len(ial.nodes), ial.required_nodes))
5100
    self.op.remote_node = ial.nodes[0]
5101
    self.LogInfo("Selected new secondary for the instance: %s",
5102
                 self.op.remote_node)
5103

    
5104
  def BuildHooksEnv(self):
5105
    """Build hooks env.
5106

5107
    This runs on the master, the primary and all the secondaries.
5108

5109
    """
5110
    env = {
5111
      "MODE": self.op.mode,
5112
      "NEW_SECONDARY": self.op.remote_node,
5113
      "OLD_SECONDARY": self.instance.secondary_nodes[0],
5114
      }
5115
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5116
    nl = [
5117
      self.cfg.GetMasterNode(),
5118
      self.instance.primary_node,
5119
      ]
5120
    if self.op.remote_node is not None:
5121
      nl.append(self.op.remote_node)
5122
    return env, nl, nl
5123

    
5124
  def CheckPrereq(self):
5125
    """Check prerequisites.
5126

5127
    This checks that the instance is in the cluster.
5128

5129
    """
5130
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5131
    assert instance is not None, \
5132
      "Cannot retrieve locked instance %s" % self.op.instance_name
5133
    self.instance = instance
5134

    
5135
    if instance.disk_template != constants.DT_DRBD8:
5136
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
5137
                                 " instances")
5138

    
5139
    if len(instance.secondary_nodes) != 1:
5140
      raise errors.OpPrereqError("The instance has a strange layout,"
5141
                                 " expected one secondary but found %d" %
5142
                                 len(instance.secondary_nodes))
5143

    
5144
    self.sec_node = instance.secondary_nodes[0]
5145

    
5146
    if self.op.iallocator is not None:
5147
      self._RunAllocator()
5148

    
5149
    remote_node = self.op.remote_node
5150
    if remote_node is not None:
5151
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
5152
      assert self.remote_node_info is not None, \
5153
        "Cannot retrieve locked node %s" % remote_node
5154
    else:
5155
      self.remote_node_info = None
5156
    if remote_node == instance.primary_node:
5157
      raise errors.OpPrereqError("The specified node is the primary node of"
5158
                                 " the instance.")
5159
    elif remote_node == self.sec_node:
5160
      raise errors.OpPrereqError("The specified node is already the"
5161
                                 " secondary node of the instance.")
5162

    
5163
    if self.op.mode == constants.REPLACE_DISK_PRI:
5164
      n1 = self.tgt_node = instance.primary_node
5165
      n2 = self.oth_node = self.sec_node
5166
    elif self.op.mode == constants.REPLACE_DISK_SEC:
5167
      n1 = self.tgt_node = self.sec_node
5168
      n2 = self.oth_node = instance.primary_node
5169
    elif self.op.mode == constants.REPLACE_DISK_CHG:
5170
      n1 = self.new_node = remote_node
5171
      n2 = self.oth_node = instance.primary_node
5172
      self.tgt_node = self.sec_node
5173
      _CheckNodeNotDrained(self, remote_node)
5174
    else:
5175
      raise errors.ProgrammerError("Unhandled disk replace mode")
5176

    
5177
    _CheckNodeOnline(self, n1)
5178
    _CheckNodeOnline(self, n2)
5179

    
5180
    if not self.op.disks:
5181
      self.op.disks = range(len(instance.disks))
5182

    
5183
    for disk_idx in self.op.disks:
5184
      instance.FindDisk(disk_idx)
5185

    
5186
  def _ExecD8DiskOnly(self, feedback_fn):
5187
    """Replace a disk on the primary or secondary for dbrd8.
5188

5189
    The algorithm for replace is quite complicated:
5190

5191
      1. for each disk to be replaced:
5192

5193
        1. create new LVs on the target node with unique names
5194
        1. detach old LVs from the drbd device
5195
        1. rename old LVs to name_replaced.<time_t>
5196
        1. rename new LVs to old LVs
5197
        1. attach the new LVs (with the old names now) to the drbd device
5198

5199
      1. wait for sync across all devices
5200

5201
      1. for each modified disk:
5202

5203
        1. remove old LVs (which have the name name_replaces.<time_t>)
5204

5205
    Failures are not very well handled.
5206

5207
    """
5208
    steps_total = 6
5209
    warning, info = (self.proc.LogWarning, self.proc.LogInfo)
5210
    instance = self.instance
5211
    iv_names = {}
5212
    vgname = self.cfg.GetVGName()
5213
    # start of work
5214
    cfg = self.cfg
5215
    tgt_node = self.tgt_node
5216
    oth_node = self.oth_node
5217

    
5218
    # Step: check device activation
5219
    self.proc.LogStep(1, steps_total, "check device existence")
5220
    info("checking volume groups")
5221
    my_vg = cfg.GetVGName()
5222
    results = self.rpc.call_vg_list([oth_node, tgt_node])
5223
    if not results:
5224
      raise errors.OpExecError("Can't list volume groups on the nodes")
5225
    for node in oth_node, tgt_node:
5226
      res = results[node]
5227
      if res.failed or not res.data or my_vg not in res.data:
5228
        raise errors.OpExecError("Volume group '%s' not found on %s" %
5229
                                 (my_vg, node))
5230
    for idx, dev in enumerate(instance.disks):
5231
      if idx not in self.op.disks:
5232
        continue
5233
      for node in tgt_node, oth_node:
5234
        info("checking disk/%d on %s" % (idx, node))
5235
        cfg.SetDiskID(dev, node)
5236
        result = self.rpc.call_blockdev_find(node, dev)
5237
        msg = result.RemoteFailMsg()
5238
        if not msg and not result.payload:
5239
          msg = "disk not found"
5240
        if msg:
5241
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
5242
                                   (idx, node, msg))
5243

    
5244
    # Step: check other node consistency
5245
    self.proc.LogStep(2, steps_total, "check peer consistency")
5246
    for idx, dev in enumerate(instance.disks):
5247
      if idx not in self.op.disks:
5248
        continue
5249
      info("checking disk/%d consistency on %s" % (idx, oth_node))
5250
      if not _CheckDiskConsistency(self, dev, oth_node,
5251
                                   oth_node==instance.primary_node):
5252
        raise errors.OpExecError("Peer node (%s) has degraded storage, unsafe"
5253
                                 " to replace disks on this node (%s)" %
5254
                                 (oth_node, tgt_node))
5255

    
5256
    # Step: create new storage
5257
    self.proc.LogStep(3, steps_total, "allocate new storage")
5258
    for idx, dev in enumerate(instance.disks):
5259
      if idx not in self.op.disks:
5260
        continue
5261
      size = dev.size
5262
      cfg.SetDiskID(dev, tgt_node)
5263
      lv_names = [".disk%d_%s" % (idx, suf)
5264
                  for suf in ["data", "meta"]]
5265
      names = _GenerateUniqueNames(self, lv_names)
5266
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5267
                             logical_id=(vgname, names[0]))
5268
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5269
                             logical_id=(vgname, names[1]))
5270
      new_lvs = [lv_data, lv_meta]
5271
      old_lvs = dev.children
5272
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
5273
      info("creating new local storage on %s for %s" %
5274
           (tgt_node, dev.iv_name))
5275
      # we pass force_create=True to force the LVM creation
5276
      for new_lv in new_lvs:
5277
        _CreateBlockDev(self, tgt_node, instance, new_lv, True,
5278
                        _GetInstanceInfoText(instance), False)
5279

    
5280
    # Step: for each lv, detach+rename*2+attach
5281
    self.proc.LogStep(4, steps_total, "change drbd configuration")
5282
    for dev, old_lvs, new_lvs in iv_names.itervalues():
5283
      info("detaching %s drbd from local storage" % dev.iv_name)
5284
      result = self.rpc.call_blockdev_removechildren(tgt_node, dev, old_lvs)
5285
      msg = result.RemoteFailMsg()
5286
      if msg:
5287
        raise errors.OpExecError("Can't detach drbd from local storage on node"
5288
                                 " %s for device %s: %s" %
5289
                                 (tgt_node, dev.iv_name, msg))
5290
      #dev.children = []
5291
      #cfg.Update(instance)
5292

    
5293
      # ok, we created the new LVs, so now we know we have the needed
5294
      # storage; as such, we proceed on the target node to rename
5295
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
5296
      # using the assumption that logical_id == physical_id (which in
5297
      # turn is the unique_id on that node)
5298

    
5299
      # FIXME(iustin): use a better name for the replaced LVs
5300
      temp_suffix = int(time.time())
5301
      ren_fn = lambda d, suff: (d.physical_id[0],
5302
                                d.physical_id[1] + "_replaced-%s" % suff)
5303
      # build the rename list based on what LVs exist on the node
5304
      rlist = []
5305
      for to_ren in old_lvs:
5306
        result = self.rpc.call_blockdev_find(tgt_node, to_ren)
5307
        if not result.RemoteFailMsg() and result.payload:
5308
          # device exists
5309
          rlist.append((to_ren, ren_fn(to_ren, temp_suffix)))
5310

    
5311
      info("renaming the old LVs on the target node")
5312
      result = self.rpc.call_blockdev_rename(tgt_node, rlist)
5313
      msg = result.RemoteFailMsg()
5314
      if msg:
5315
        raise errors.OpExecError("Can't rename old LVs on node %s: %s" %
5316
                                 (tgt_node, msg))
5317
      # now we rename the new LVs to the old LVs
5318
      info("renaming the new LVs on the target node")
5319
      rlist = [(new, old.physical_id) for old, new in zip(old_lvs, new_lvs)]
5320
      result = self.rpc.call_blockdev_rename(tgt_node, rlist)
5321
      msg = result.RemoteFailMsg()
5322
      if msg:
5323
        raise errors.OpExecError("Can't rename new LVs on node %s: %s" %
5324
                                 (tgt_node, msg))
5325

    
5326
      for old, new in zip(old_lvs, new_lvs):
5327
        new.logical_id = old.logical_id
5328
        cfg.SetDiskID(new, tgt_node)
5329

    
5330
      for disk in old_lvs:
5331
        disk.logical_id = ren_fn(disk, temp_suffix)
5332
        cfg.SetDiskID(disk, tgt_node)
5333

    
5334
      # now that the new lvs have the old name, we can add them to the device
5335
      info("adding new mirror component on %s" % tgt_node)
5336
      result = self.rpc.call_blockdev_addchildren(tgt_node, dev, new_lvs)
5337
      msg = result.RemoteFailMsg()
5338
      if msg:
5339
        for new_lv in new_lvs:
5340
          msg = self.rpc.call_blockdev_remove(tgt_node, new_lv).RemoteFailMsg()
5341
          if msg:
5342
            warning("Can't rollback device %s: %s", dev, msg,
5343
                    hint="cleanup manually the unused logical volumes")
5344
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
5345

    
5346
      dev.children = new_lvs
5347
      cfg.Update(instance)
5348

    
5349
    # Step: wait for sync
5350

    
5351
    # this can fail as the old devices are degraded and _WaitForSync
5352
    # does a combined result over all disks, so we don't check its
5353
    # return value
5354
    self.proc.LogStep(5, steps_total, "sync devices")
5355
    _WaitForSync(self, instance, unlock=True)
5356

    
5357
    # so check manually all the devices
5358
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
5359
      cfg.SetDiskID(dev, instance.primary_node)
5360
      result = self.rpc.call_blockdev_find(instance.primary_node, dev)
5361
      msg = result.RemoteFailMsg()
5362
      if not msg and not result.payload:
5363
        msg = "disk not found"
5364
      if msg:
5365
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
5366
                                 (name, msg))
5367
      if result.payload[5]:
5368
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
5369

    
5370
    # Step: remove old storage
5371
    self.proc.LogStep(6, steps_total, "removing old storage")
5372
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
5373
      info("remove logical volumes for %s" % name)
5374
      for lv in old_lvs:
5375
        cfg.SetDiskID(lv, tgt_node)
5376
        msg = self.rpc.call_blockdev_remove(tgt_node, lv).RemoteFailMsg()
5377
        if msg:
5378
          warning("Can't remove old LV: %s" % msg,
5379
                  hint="manually remove unused LVs")
5380
          continue
5381

    
5382
  def _ExecD8Secondary(self, feedback_fn):
5383
    """Replace the secondary node for drbd8.
5384

5385
    The algorithm for replace is quite complicated:
5386
      - for all disks of the instance:
5387
        - create new LVs on the new node with same names
5388
        - shutdown the drbd device on the old secondary
5389
        - disconnect the drbd network on the primary
5390
        - create the drbd device on the new secondary
5391
        - network attach the drbd on the primary, using an artifice:
5392
          the drbd code for Attach() will connect to the network if it
5393
          finds a device which is connected to the good local disks but
5394
          not network enabled
5395
      - wait for sync across all devices
5396
      - remove all disks from the old secondary
5397

5398
    Failures are not very well handled.
5399

5400
    """
5401
    steps_total = 6
5402
    warning, info = (self.proc.LogWarning, self.proc.LogInfo)
5403
    instance = self.instance
5404
    iv_names = {}
5405
    # start of work
5406
    cfg = self.cfg
5407
    old_node = self.tgt_node
5408
    new_node = self.new_node
5409
    pri_node = instance.primary_node
5410
    nodes_ip = {
5411
      old_node: self.cfg.GetNodeInfo(old_node).secondary_ip,
5412
      new_node: self.cfg.GetNodeInfo(new_node).secondary_ip,
5413
      pri_node: self.cfg.GetNodeInfo(pri_node).secondary_ip,
5414
      }
5415

    
5416
    # Step: check device activation
5417
    self.proc.LogStep(1, steps_total, "check device existence")
5418
    info("checking volume groups")
5419
    my_vg = cfg.GetVGName()
5420
    results = self.rpc.call_vg_list([pri_node, new_node])
5421
    for node in pri_node, new_node:
5422
      res = results[node]
5423
      if res.failed or not res.data or my_vg not in res.data:
5424
        raise errors.OpExecError("Volume group '%s' not found on %s" %
5425
                                 (my_vg, node))
5426
    for idx, dev in enumerate(instance.disks):
5427
      if idx not in self.op.disks:
5428
        continue
5429
      info("checking disk/%d on %s" % (idx, pri_node))
5430
      cfg.SetDiskID(dev, pri_node)
5431
      result = self.rpc.call_blockdev_find(pri_node, dev)
5432
      msg = result.RemoteFailMsg()
5433
      if not msg and not result.payload:
5434
        msg = "disk not found"
5435
      if msg:
5436
        raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
5437
                                 (idx, pri_node, msg))
5438

    
5439
    # Step: check other node consistency
5440
    self.proc.LogStep(2, steps_total, "check peer consistency")
5441
    for idx, dev in enumerate(instance.disks):
5442
      if idx not in self.op.disks:
5443
        continue
5444
      info("checking disk/%d consistency on %s" % (idx, pri_node))
5445
      if not _CheckDiskConsistency(self, dev, pri_node, True, ldisk=True):
5446
        raise errors.OpExecError("Primary node (%s) has degraded storage,"
5447
                                 " unsafe to replace the secondary" %
5448
                                 pri_node)
5449

    
5450
    # Step: create new storage
5451
    self.proc.LogStep(3, steps_total, "allocate new storage")
5452
    for idx, dev in enumerate(instance.disks):
5453
      info("adding new local storage on %s for disk/%d" %
5454
           (new_node, idx))
5455
      # we pass force_create=True to force LVM creation
5456
      for new_lv in dev.children:
5457
        _CreateBlockDev(self, new_node, instance, new_lv, True,
5458
                        _GetInstanceInfoText(instance), False)
5459

    
5460
    # Step 4: dbrd minors and drbd setups changes
5461
    # after this, we must manually remove the drbd minors on both the
5462
    # error and the success paths
5463
    minors = cfg.AllocateDRBDMinor([new_node for dev in instance.disks],
5464
                                   instance.name)
5465
    logging.debug("Allocated minors %s" % (minors,))
5466
    self.proc.LogStep(4, steps_total, "changing drbd configuration")
5467
    for idx, (dev, new_minor) in enumerate(zip(instance.disks, minors)):
5468
      size = dev.size
5469
      info("activating a new drbd on %s for disk/%d" % (new_node, idx))
5470
      # create new devices on new_node; note that we create two IDs:
5471
      # one without port, so the drbd will be activated without
5472
      # networking information on the new node at this stage, and one
5473
      # with network, for the latter activation in step 4
5474
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
5475
      if pri_node == o_node1:
5476
        p_minor = o_minor1
5477
      else:
5478
        p_minor = o_minor2
5479

    
5480
      new_alone_id = (pri_node, new_node, None, p_minor, new_minor, o_secret)
5481
      new_net_id = (pri_node, new_node, o_port, p_minor, new_minor, o_secret)
5482

    
5483
      iv_names[idx] = (dev, dev.children, new_net_id)
5484
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
5485
                    new_net_id)
5486
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
5487
                              logical_id=new_alone_id,
5488
                              children=dev.children)
5489
      try:
5490
        _CreateSingleBlockDev(self, new_node, instance, new_drbd,
5491
                              _GetInstanceInfoText(instance), False)
5492
      except errors.GenericError:
5493
        self.cfg.ReleaseDRBDMinors(instance.name)
5494
        raise
5495

    
5496
    for idx, dev in enumerate(instance.disks):
5497
      # we have new devices, shutdown the drbd on the old secondary
5498
      info("shutting down drbd for disk/%d on old node" % idx)
5499
      cfg.SetDiskID(dev, old_node)
5500
      msg = self.rpc.call_blockdev_shutdown(old_node, dev).RemoteFailMsg()
5501
      if msg:
5502
        warning("Failed to shutdown drbd for disk/%d on old node: %s" %
5503
                (idx, msg),
5504
                hint="Please cleanup this device manually as soon as possible")
5505

    
5506
    info("detaching primary drbds from the network (=> standalone)")
5507
    result = self.rpc.call_drbd_disconnect_net([pri_node], nodes_ip,
5508
                                               instance.disks)[pri_node]
5509

    
5510
    msg = result.RemoteFailMsg()
5511
    if msg:
5512
      # detaches didn't succeed (unlikely)
5513
      self.cfg.ReleaseDRBDMinors(instance.name)
5514
      raise errors.OpExecError("Can't detach the disks from the network on"
5515
                               " old node: %s" % (msg,))
5516

    
5517
    # if we managed to detach at least one, we update all the disks of
5518
    # the instance to point to the new secondary
5519
    info("updating instance configuration")
5520
    for dev, _, new_logical_id in iv_names.itervalues():
5521
      dev.logical_id = new_logical_id
5522
      cfg.SetDiskID(dev, pri_node)
5523
    cfg.Update(instance)
5524

    
5525
    # and now perform the drbd attach
5526
    info("attaching primary drbds to new secondary (standalone => connected)")
5527
    result = self.rpc.call_drbd_attach_net([pri_node, new_node], nodes_ip,
5528
                                           instance.disks, instance.name,
5529
                                           False)
5530
    for to_node, to_result in result.items():
5531
      msg = to_result.RemoteFailMsg()
5532
      if msg:
5533
        warning("can't attach drbd disks on node %s: %s", to_node, msg,
5534
                hint="please do a gnt-instance info to see the"
5535
                " status of disks")
5536

    
5537
    # this can fail as the old devices are degraded and _WaitForSync
5538
    # does a combined result over all disks, so we don't check its
5539
    # return value
5540
    self.proc.LogStep(5, steps_total, "sync devices")
5541
    _WaitForSync(self, instance, unlock=True)
5542

    
5543
    # so check manually all the devices
5544
    for idx, (dev, old_lvs, _) in iv_names.iteritems():
5545
      cfg.SetDiskID(dev, pri_node)
5546
      result = self.rpc.call_blockdev_find(pri_node, dev)
5547
      msg = result.RemoteFailMsg()
5548
      if not msg and not result.payload:
5549
        msg = "disk not found"
5550
      if msg:
5551
        raise errors.OpExecError("Can't find DRBD device disk/%d: %s" %
5552
                                 (idx, msg))
5553
      if result.payload[5]:
5554
        raise errors.OpExecError("DRBD device disk/%d is degraded!" % idx)
5555

    
5556
    self.proc.LogStep(6, steps_total, "removing old storage")
5557
    for idx, (dev, old_lvs, _) in iv_names.iteritems():
5558
      info("remove logical volumes for disk/%d" % idx)
5559
      for lv in old_lvs:
5560
        cfg.SetDiskID(lv, old_node)
5561
        msg = self.rpc.call_blockdev_remove(old_node, lv).RemoteFailMsg()
5562
        if msg:
5563
          warning("Can't remove LV on old secondary: %s", msg,
5564
                  hint="Cleanup stale volumes by hand")
5565

    
5566
  def Exec(self, feedback_fn):
5567
    """Execute disk replacement.
5568

5569
    This dispatches the disk replacement to the appropriate handler.
5570

5571
    """
5572
    instance = self.instance
5573

    
5574
    # Activate the instance disks if we're replacing them on a down instance
5575
    if not instance.admin_up:
5576
      _StartInstanceDisks(self, instance, True)
5577

    
5578
    if self.op.mode == constants.REPLACE_DISK_CHG:
5579
      fn = self._ExecD8Secondary
5580
    else:
5581
      fn = self._ExecD8DiskOnly
5582

    
5583
    ret = fn(feedback_fn)
5584

    
5585
    # Deactivate the instance disks if we're replacing them on a down instance
5586
    if not instance.admin_up:
5587
      _SafeShutdownInstanceDisks(self, instance)
5588

    
5589
    return ret
5590

    
5591

    
5592
class LUGrowDisk(LogicalUnit):
5593
  """Grow a disk of an instance.
5594

5595
  """
5596
  HPATH = "disk-grow"
5597
  HTYPE = constants.HTYPE_INSTANCE
5598
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
5599
  REQ_BGL = False
5600

    
5601
  def ExpandNames(self):
5602
    self._ExpandAndLockInstance()
5603
    self.needed_locks[locking.LEVEL_NODE] = []
5604
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5605

    
5606
  def DeclareLocks(self, level):
5607
    if level == locking.LEVEL_NODE:
5608
      self._LockInstancesNodes()
5609

    
5610
  def BuildHooksEnv(self):
5611
    """Build hooks env.
5612

5613
    This runs on the master, the primary and all the secondaries.
5614

5615
    """
5616
    env = {
5617
      "DISK": self.op.disk,
5618
      "AMOUNT": self.op.amount,
5619
      }
5620
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5621
    nl = [
5622
      self.cfg.GetMasterNode(),
5623
      self.instance.primary_node,
5624
      ]
5625
    return env, nl, nl
5626

    
5627
  def CheckPrereq(self):
5628
    """Check prerequisites.
5629

5630
    This checks that the instance is in the cluster.
5631

5632
    """
5633
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5634
    assert instance is not None, \
5635
      "Cannot retrieve locked instance %s" % self.op.instance_name
5636
    nodenames = list(instance.all_nodes)
5637
    for node in nodenames:
5638
      _CheckNodeOnline(self, node)
5639

    
5640

    
5641
    self.instance = instance
5642

    
5643
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
5644
      raise errors.OpPrereqError("Instance's disk layout does not support"
5645
                                 " growing.")
5646

    
5647
    self.disk = instance.FindDisk(self.op.disk)
5648

    
5649
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
5650
                                       instance.hypervisor)
5651
    for node in nodenames:
5652
      info = nodeinfo[node]
5653
      if info.failed or not info.data:
5654
        raise errors.OpPrereqError("Cannot get current information"
5655
                                   " from node '%s'" % node)
5656
      vg_free = info.data.get('vg_free', None)
5657
      if not isinstance(vg_free, int):
5658
        raise errors.OpPrereqError("Can't compute free disk space on"
5659
                                   " node %s" % node)
5660
      if self.op.amount > vg_free:
5661
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
5662
                                   " %d MiB available, %d MiB required" %
5663
                                   (node, vg_free, self.op.amount))
5664

    
5665
  def Exec(self, feedback_fn):
5666
    """Execute disk grow.
5667

5668
    """
5669
    instance = self.instance
5670
    disk = self.disk
5671
    for node in instance.all_nodes:
5672
      self.cfg.SetDiskID(disk, node)
5673
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
5674
      msg = result.RemoteFailMsg()
5675
      if msg:
5676
        raise errors.OpExecError("Grow request failed to node %s: %s" %
5677
                                 (node, msg))
5678
    disk.RecordGrow(self.op.amount)
5679
    self.cfg.Update(instance)
5680
    if self.op.wait_for_sync:
5681
      disk_abort = not _WaitForSync(self, instance)
5682
      if disk_abort:
5683
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
5684
                             " status.\nPlease check the instance.")
5685

    
5686

    
5687
class LUQueryInstanceData(NoHooksLU):
5688
  """Query runtime instance data.
5689

5690
  """
5691
  _OP_REQP = ["instances", "static"]
5692
  REQ_BGL = False
5693

    
5694
  def ExpandNames(self):
5695
    self.needed_locks = {}
5696
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
5697

    
5698
    if not isinstance(self.op.instances, list):
5699
      raise errors.OpPrereqError("Invalid argument type 'instances'")
5700

    
5701
    if self.op.instances:
5702
      self.wanted_names = []
5703
      for name in self.op.instances:
5704
        full_name = self.cfg.ExpandInstanceName(name)
5705
        if full_name is None:
5706
          raise errors.OpPrereqError("Instance '%s' not known" % name)
5707
        self.wanted_names.append(full_name)
5708
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
5709
    else:
5710
      self.wanted_names = None
5711
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5712

    
5713
    self.needed_locks[locking.LEVEL_NODE] = []
5714
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5715

    
5716
  def DeclareLocks(self, level):
5717
    if level == locking.LEVEL_NODE:
5718
      self._LockInstancesNodes()
5719

    
5720
  def CheckPrereq(self):
5721
    """Check prerequisites.
5722

5723
    This only checks the optional instance list against the existing names.
5724

5725
    """
5726
    if self.wanted_names is None:
5727
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5728

    
5729
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
5730
                             in self.wanted_names]
5731
    return
5732

    
5733
  def _ComputeDiskStatus(self, instance, snode, dev):
5734
    """Compute block device status.
5735

5736
    """
5737
    static = self.op.static
5738
    if not static:
5739
      self.cfg.SetDiskID(dev, instance.primary_node)
5740
      dev_pstatus = self.rpc.call_blockdev_find(instance.primary_node, dev)
5741
      if dev_pstatus.offline:
5742
        dev_pstatus = None
5743
      else:
5744
        msg = dev_pstatus.RemoteFailMsg()
5745
        if msg:
5746
          raise errors.OpExecError("Can't compute disk status for %s: %s" %
5747
                                   (instance.name, msg))
5748
        dev_pstatus = dev_pstatus.payload
5749
    else:
5750
      dev_pstatus = None
5751

    
5752
    if dev.dev_type in constants.LDS_DRBD:
5753
      # we change the snode then (otherwise we use the one passed in)
5754
      if dev.logical_id[0] == instance.primary_node:
5755
        snode = dev.logical_id[1]
5756
      else:
5757
        snode = dev.logical_id[0]
5758

    
5759
    if snode and not static:
5760
      self.cfg.SetDiskID(dev, snode)
5761
      dev_sstatus = self.rpc.call_blockdev_find(snode, dev)
5762
      if dev_sstatus.offline:
5763
        dev_sstatus = None
5764
      else:
5765
        msg = dev_sstatus.RemoteFailMsg()
5766
        if msg:
5767
          raise errors.OpExecError("Can't compute disk status for %s: %s" %
5768
                                   (instance.name, msg))
5769
        dev_sstatus = dev_sstatus.payload
5770
    else:
5771
      dev_sstatus = None
5772

    
5773
    if dev.children:
5774
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
5775
                      for child in dev.children]
5776
    else:
5777
      dev_children = []
5778

    
5779
    data = {
5780
      "iv_name": dev.iv_name,
5781
      "dev_type": dev.dev_type,
5782
      "logical_id": dev.logical_id,
5783
      "physical_id": dev.physical_id,
5784
      "pstatus": dev_pstatus,
5785
      "sstatus": dev_sstatus,
5786
      "children": dev_children,
5787
      "mode": dev.mode,
5788
      }
5789

    
5790
    return data
5791

    
5792
  def Exec(self, feedback_fn):
5793
    """Gather and return data"""
5794
    result = {}
5795

    
5796
    cluster = self.cfg.GetClusterInfo()
5797

    
5798
    for instance in self.wanted_instances:
5799
      if not self.op.static:
5800
        remote_info = self.rpc.call_instance_info(instance.primary_node,
5801
                                                  instance.name,
5802
                                                  instance.hypervisor)
5803
        remote_info.Raise()
5804
        remote_info = remote_info.data
5805
        if remote_info and "state" in remote_info:
5806
          remote_state = "up"
5807
        else:
5808
          remote_state = "down"
5809
      else:
5810
        remote_state = None
5811
      if instance.admin_up:
5812
        config_state = "up"
5813
      else:
5814
        config_state = "down"
5815

    
5816
      disks = [self._ComputeDiskStatus(instance, None, device)
5817
               for device in instance.disks]
5818

    
5819
      idict = {
5820
        "name": instance.name,
5821
        "config_state": config_state,
5822
        "run_state": remote_state,
5823
        "pnode": instance.primary_node,
5824
        "snodes": instance.secondary_nodes,
5825
        "os": instance.os,
5826
        "nics": [(nic.mac, nic.ip, nic.bridge) for nic in instance.nics],
5827
        "disks": disks,
5828
        "hypervisor": instance.hypervisor,
5829
        "network_port": instance.network_port,
5830
        "hv_instance": instance.hvparams,
5831
        "hv_actual": cluster.FillHV(instance),
5832
        "be_instance": instance.beparams,
5833
        "be_actual": cluster.FillBE(instance),
5834
        }
5835

    
5836
      result[instance.name] = idict
5837

    
5838
    return result
5839

    
5840

    
5841
class LUSetInstanceParams(LogicalUnit):
5842
  """Modifies an instances's parameters.
5843

5844
  """
5845
  HPATH = "instance-modify"
5846
  HTYPE = constants.HTYPE_INSTANCE
5847
  _OP_REQP = ["instance_name"]
5848
  REQ_BGL = False
5849

    
5850
  def CheckArguments(self):
5851
    if not hasattr(self.op, 'nics'):
5852
      self.op.nics = []
5853
    if not hasattr(self.op, 'disks'):
5854
      self.op.disks = []
5855
    if not hasattr(self.op, 'beparams'):
5856
      self.op.beparams = {}
5857
    if not hasattr(self.op, 'hvparams'):
5858
      self.op.hvparams = {}
5859
    self.op.force = getattr(self.op, "force", False)
5860
    if not (self.op.nics or self.op.disks or
5861
            self.op.hvparams or self.op.beparams):
5862
      raise errors.OpPrereqError("No changes submitted")
5863

    
5864
    # Disk validation
5865
    disk_addremove = 0
5866
    for disk_op, disk_dict in self.op.disks:
5867
      if disk_op == constants.DDM_REMOVE:
5868
        disk_addremove += 1
5869
        continue
5870
      elif disk_op == constants.DDM_ADD:
5871
        disk_addremove += 1
5872
      else:
5873
        if not isinstance(disk_op, int):
5874
          raise errors.OpPrereqError("Invalid disk index")
5875
      if disk_op == constants.DDM_ADD:
5876
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
5877
        if mode not in constants.DISK_ACCESS_SET:
5878
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
5879
        size = disk_dict.get('size', None)
5880
        if size is None:
5881
          raise errors.OpPrereqError("Required disk parameter size missing")
5882
        try:
5883
          size = int(size)
5884
        except ValueError, err:
5885
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
5886
                                     str(err))
5887
        disk_dict['size'] = size
5888
      else:
5889
        # modification of disk
5890
        if 'size' in disk_dict:
5891
          raise errors.OpPrereqError("Disk size change not possible, use"
5892
                                     " grow-disk")
5893

    
5894
    if disk_addremove > 1:
5895
      raise errors.OpPrereqError("Only one disk add or remove operation"
5896
                                 " supported at a time")
5897

    
5898
    # NIC validation
5899
    nic_addremove = 0
5900
    for nic_op, nic_dict in self.op.nics:
5901
      if nic_op == constants.DDM_REMOVE:
5902
        nic_addremove += 1
5903
        continue
5904
      elif nic_op == constants.DDM_ADD:
5905
        nic_addremove += 1
5906
      else:
5907
        if not isinstance(nic_op, int):
5908
          raise errors.OpPrereqError("Invalid nic index")
5909

    
5910
      # nic_dict should be a dict
5911
      nic_ip = nic_dict.get('ip', None)
5912
      if nic_ip is not None:
5913
        if nic_ip.lower() == constants.VALUE_NONE:
5914
          nic_dict['ip'] = None
5915
        else:
5916
          if not utils.IsValidIP(nic_ip):
5917
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
5918

    
5919
      nic_bridge = nic_dict.get('bridge', None)
5920
      nic_link = nic_dict.get('link', None)
5921
      if nic_bridge and nic_link:
5922
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link' at the same time")
5923
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
5924
        nic_dict['bridge'] = None
5925
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
5926
        nic_dict['link'] = None
5927

    
5928
      if nic_op == constants.DDM_ADD:
5929
        nic_mac = nic_dict.get('mac', None)
5930
        if nic_mac is None:
5931
          nic_dict['mac'] = constants.VALUE_AUTO
5932

    
5933
      if 'mac' in nic_dict:
5934
        nic_mac = nic_dict['mac']
5935
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5936
          if not utils.IsValidMac(nic_mac):
5937
            raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
5938
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
5939
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
5940
                                     " modifying an existing nic")
5941

    
5942
    if nic_addremove > 1:
5943
      raise errors.OpPrereqError("Only one NIC add or remove operation"
5944
                                 " supported at a time")
5945

    
5946
  def ExpandNames(self):
5947
    self._ExpandAndLockInstance()
5948
    self.needed_locks[locking.LEVEL_NODE] = []
5949
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5950

    
5951
  def DeclareLocks(self, level):
5952
    if level == locking.LEVEL_NODE:
5953
      self._LockInstancesNodes()
5954

    
5955
  def BuildHooksEnv(self):
5956
    """Build hooks env.
5957

5958
    This runs on the master, primary and secondaries.
5959

5960
    """
5961
    args = dict()
5962
    if constants.BE_MEMORY in self.be_new:
5963
      args['memory'] = self.be_new[constants.BE_MEMORY]
5964
    if constants.BE_VCPUS in self.be_new:
5965
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
5966
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
5967
    # information at all.
5968
    if self.op.nics:
5969
      args['nics'] = []
5970
      nic_override = dict(self.op.nics)
5971
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
5972
      for idx, nic in enumerate(self.instance.nics):
5973
        if idx in nic_override:
5974
          this_nic_override = nic_override[idx]
5975
        else:
5976
          this_nic_override = {}
5977
        if 'ip' in this_nic_override:
5978
          ip = this_nic_override['ip']
5979
        else:
5980
          ip = nic.ip
5981
        if 'mac' in this_nic_override:
5982
          mac = this_nic_override['mac']
5983
        else:
5984
          mac = nic.mac
5985
        if idx in self.nic_pnew:
5986
          nicparams = self.nic_pnew[idx]
5987
        else:
5988
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
5989
        mode = nicparams[constants.NIC_MODE]
5990
        link = nicparams[constants.NIC_LINK]
5991
        args['nics'].append((ip, mac, mode, link))
5992
      if constants.DDM_ADD in nic_override:
5993
        ip = nic_override[constants.DDM_ADD].get('ip', None)
5994
        mac = nic_override[constants.DDM_ADD]['mac']
5995
        nicparams = self.nic_pnew[constants.DDM_ADD]
5996
        mode = nicparams[constants.NIC_MODE]
5997
        link = nicparams[constants.NIC_LINK]
5998
        args['nics'].append((ip, mac, mode, link))
5999
      elif constants.DDM_REMOVE in nic_override:
6000
        del args['nics'][-1]
6001

    
6002
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
6003
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6004
    return env, nl, nl
6005

    
6006
  def _GetUpdatedParams(self, old_params, update_dict,
6007
                        default_values, parameter_types):
6008
    """Return the new params dict for the given params.
6009

6010
    @type old_params: dict
6011
    @type old_params: old parameters
6012
    @type update_dict: dict
6013
    @type update_dict: dict containing new parameter values,
6014
                       or constants.VALUE_DEFAULT to reset the
6015
                       parameter to its default value
6016
    @type default_values: dict
6017
    @param default_values: default values for the filled parameters
6018
    @type parameter_types: dict
6019
    @param parameter_types: dict mapping target dict keys to types
6020
                            in constants.ENFORCEABLE_TYPES
6021
    @rtype: (dict, dict)
6022
    @return: (new_parameters, filled_parameters)
6023

6024
    """
6025
    params_copy = copy.deepcopy(old_params)
6026
    for key, val in update_dict.iteritems():
6027
      if val == constants.VALUE_DEFAULT:
6028
        try:
6029
          del params_copy[key]
6030
        except KeyError:
6031
          pass
6032
      else:
6033
        params_copy[key] = val
6034
    utils.ForceDictType(params_copy, parameter_types)
6035
    params_filled = objects.FillDict(default_values, params_copy)
6036
    return (params_copy, params_filled)
6037

    
6038
  def CheckPrereq(self):
6039
    """Check prerequisites.
6040

6041
    This only checks the instance list against the existing names.
6042

6043
    """
6044
    force = self.force = self.op.force
6045

    
6046
    # checking the new params on the primary/secondary nodes
6047

    
6048
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6049
    cluster = self.cluster = self.cfg.GetClusterInfo()
6050
    assert self.instance is not None, \
6051
      "Cannot retrieve locked instance %s" % self.op.instance_name
6052
    pnode = instance.primary_node
6053
    nodelist = list(instance.all_nodes)
6054

    
6055
    # hvparams processing
6056
    if self.op.hvparams:
6057
      i_hvdict, hv_new = self._GetUpdatedParams(
6058
                             instance.hvparams, self.op.hvparams,
6059
                             cluster.hvparams[instance.hypervisor],
6060
                             constants.HVS_PARAMETER_TYPES)
6061
      # local check
6062
      hypervisor.GetHypervisor(
6063
        instance.hypervisor).CheckParameterSyntax(hv_new)
6064
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
6065
      self.hv_new = hv_new # the new actual values
6066
      self.hv_inst = i_hvdict # the new dict (without defaults)
6067
    else:
6068
      self.hv_new = self.hv_inst = {}
6069

    
6070
    # beparams processing
6071
    if self.op.beparams:
6072
      i_bedict, be_new = self._GetUpdatedParams(
6073
                             instance.beparams, self.op.beparams,
6074
                             cluster.beparams[constants.PP_DEFAULT],
6075
                             constants.BES_PARAMETER_TYPES)
6076
      self.be_new = be_new # the new actual values
6077
      self.be_inst = i_bedict # the new dict (without defaults)
6078
    else:
6079
      self.be_new = self.be_inst = {}
6080

    
6081
    self.warn = []
6082

    
6083
    if constants.BE_MEMORY in self.op.beparams and not self.force:
6084
      mem_check_list = [pnode]
6085
      if be_new[constants.BE_AUTO_BALANCE]:
6086
        # either we changed auto_balance to yes or it was from before
6087
        mem_check_list.extend(instance.secondary_nodes)
6088
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
6089
                                                  instance.hypervisor)
6090
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
6091
                                         instance.hypervisor)
6092
      if nodeinfo[pnode].failed or not isinstance(nodeinfo[pnode].data, dict):
6093
        # Assume the primary node is unreachable and go ahead
6094
        self.warn.append("Can't get info from primary node %s" % pnode)
6095
      else:
6096
        if not instance_info.failed and instance_info.data:
6097
          current_mem = int(instance_info.data['memory'])
6098
        else:
6099
          # Assume instance not running
6100
          # (there is a slight race condition here, but it's not very probable,
6101
          # and we have no other way to check)
6102
          current_mem = 0
6103
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
6104
                    nodeinfo[pnode].data['memory_free'])
6105
        if miss_mem > 0:
6106
          raise errors.OpPrereqError("This change will prevent the instance"
6107
                                     " from starting, due to %d MB of memory"
6108
                                     " missing on its primary node" % miss_mem)
6109

    
6110
      if be_new[constants.BE_AUTO_BALANCE]:
6111
        for node, nres in nodeinfo.iteritems():
6112
          if node not in instance.secondary_nodes:
6113
            continue
6114
          if nres.failed or not isinstance(nres.data, dict):
6115
            self.warn.append("Can't get info from secondary node %s" % node)
6116
          elif be_new[constants.BE_MEMORY] > nres.data['memory_free']:
6117
            self.warn.append("Not enough memory to failover instance to"
6118
                             " secondary node %s" % node)
6119

    
6120
    # NIC processing
6121
    self.nic_pnew = {}
6122
    self.nic_pinst = {}
6123
    for nic_op, nic_dict in self.op.nics:
6124
      if nic_op == constants.DDM_REMOVE:
6125
        if not instance.nics:
6126
          raise errors.OpPrereqError("Instance has no NICs, cannot remove")
6127
        continue
6128
      if nic_op != constants.DDM_ADD:
6129
        # an existing nic
6130
        if nic_op < 0 or nic_op >= len(instance.nics):
6131
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
6132
                                     " are 0 to %d" %
6133
                                     (nic_op, len(instance.nics)))
6134
        old_nic_params = instance.nics[nic_op].nicparams
6135
        old_nic_ip = instance.nics[nic_op].ip
6136
      else:
6137
        old_nic_params = {}
6138
        old_nic_ip = None
6139

    
6140
      update_params_dict = dict([(key, nic_dict[key])
6141
                                 for key in constants.NICS_PARAMETERS
6142
                                 if key in nic_dict])
6143

    
6144
      if 'bridge' in nic_dict:
6145
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
6146

    
6147
      new_nic_params, new_filled_nic_params = \
6148
          self._GetUpdatedParams(old_nic_params, update_params_dict,
6149
                                 cluster.nicparams[constants.PP_DEFAULT],
6150
                                 constants.NICS_PARAMETER_TYPES)
6151
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
6152
      self.nic_pinst[nic_op] = new_nic_params
6153
      self.nic_pnew[nic_op] = new_filled_nic_params
6154
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
6155

    
6156
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
6157
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
6158
        result = self.rpc.call_bridges_exist(pnode, [nic_bridge])
6159
        result.Raise()
6160
        if not result.data:
6161
          msg = ("Bridge '%s' doesn't exist on one of"
6162
                 " the instance nodes" % nic_bridge)
6163
          if self.force:
6164
            self.warn.append(msg)
6165
          else:
6166
            raise errors.OpPrereqError(msg)
6167
      if new_nic_mode == constants.NIC_MODE_ROUTED:
6168
        if 'ip' in nic_dict:
6169
          nic_ip = nic_dict['ip']
6170
        else:
6171
          nic_ip = old_nic_ip
6172
        if nic_ip is None:
6173
          raise errors.OpPrereqError('Cannot set the nic ip to None'
6174
                                     ' on a routed nic')
6175
      if 'mac' in nic_dict:
6176
        nic_mac = nic_dict['mac']
6177
        if nic_mac is None:
6178
          raise errors.OpPrereqError('Cannot set the nic mac to None')
6179
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6180
          # otherwise generate the mac
6181
          nic_dict['mac'] = self.cfg.GenerateMAC()
6182
        else:
6183
          # or validate/reserve the current one
6184
          if self.cfg.IsMacInUse(nic_mac):
6185
            raise errors.OpPrereqError("MAC address %s already in use"
6186
                                       " in cluster" % nic_mac)
6187

    
6188
    # DISK processing
6189
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
6190
      raise errors.OpPrereqError("Disk operations not supported for"
6191
                                 " diskless instances")
6192
    for disk_op, disk_dict in self.op.disks:
6193
      if disk_op == constants.DDM_REMOVE:
6194
        if len(instance.disks) == 1:
6195
          raise errors.OpPrereqError("Cannot remove the last disk of"
6196
                                     " an instance")
6197
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
6198
        ins_l = ins_l[pnode]
6199
        if ins_l.failed or not isinstance(ins_l.data, list):
6200
          raise errors.OpPrereqError("Can't contact node '%s'" % pnode)
6201
        if instance.name in ins_l.data:
6202
          raise errors.OpPrereqError("Instance is running, can't remove"
6203
                                     " disks.")
6204

    
6205
      if (disk_op == constants.DDM_ADD and
6206
          len(instance.nics) >= constants.MAX_DISKS):
6207
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
6208
                                   " add more" % constants.MAX_DISKS)
6209
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
6210
        # an existing disk
6211
        if disk_op < 0 or disk_op >= len(instance.disks):
6212
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
6213
                                     " are 0 to %d" %
6214
                                     (disk_op, len(instance.disks)))
6215

    
6216
    return
6217

    
6218
  def Exec(self, feedback_fn):
6219
    """Modifies an instance.
6220

6221
    All parameters take effect only at the next restart of the instance.
6222

6223
    """
6224
    # Process here the warnings from CheckPrereq, as we don't have a
6225
    # feedback_fn there.
6226
    for warn in self.warn:
6227
      feedback_fn("WARNING: %s" % warn)
6228

    
6229
    result = []
6230
    instance = self.instance
6231
    cluster = self.cluster
6232
    # disk changes
6233
    for disk_op, disk_dict in self.op.disks:
6234
      if disk_op == constants.DDM_REMOVE:
6235
        # remove the last disk
6236
        device = instance.disks.pop()
6237
        device_idx = len(instance.disks)
6238
        for node, disk in device.ComputeNodeTree(instance.primary_node):
6239
          self.cfg.SetDiskID(disk, node)
6240
          msg = self.rpc.call_blockdev_remove(node, disk).RemoteFailMsg()
6241
          if msg:
6242
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
6243
                            " continuing anyway", device_idx, node, msg)
6244
        result.append(("disk/%d" % device_idx, "remove"))
6245
      elif disk_op == constants.DDM_ADD:
6246
        # add a new disk
6247
        if instance.disk_template == constants.DT_FILE:
6248
          file_driver, file_path = instance.disks[0].logical_id
6249
          file_path = os.path.dirname(file_path)
6250
        else:
6251
          file_driver = file_path = None
6252
        disk_idx_base = len(instance.disks)
6253
        new_disk = _GenerateDiskTemplate(self,
6254
                                         instance.disk_template,
6255
                                         instance.name, instance.primary_node,
6256
                                         instance.secondary_nodes,
6257
                                         [disk_dict],
6258
                                         file_path,
6259
                                         file_driver,
6260
                                         disk_idx_base)[0]
6261
        instance.disks.append(new_disk)
6262
        info = _GetInstanceInfoText(instance)
6263

    
6264
        logging.info("Creating volume %s for instance %s",
6265
                     new_disk.iv_name, instance.name)
6266
        # Note: this needs to be kept in sync with _CreateDisks
6267
        #HARDCODE
6268
        for node in instance.all_nodes:
6269
          f_create = node == instance.primary_node
6270
          try:
6271
            _CreateBlockDev(self, node, instance, new_disk,
6272
                            f_create, info, f_create)
6273
          except errors.OpExecError, err:
6274
            self.LogWarning("Failed to create volume %s (%s) on"
6275
                            " node %s: %s",
6276
                            new_disk.iv_name, new_disk, node, err)
6277
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
6278
                       (new_disk.size, new_disk.mode)))
6279
      else:
6280
        # change a given disk
6281
        instance.disks[disk_op].mode = disk_dict['mode']
6282
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
6283
    # NIC changes
6284
    for nic_op, nic_dict in self.op.nics:
6285
      if nic_op == constants.DDM_REMOVE:
6286
        # remove the last nic
6287
        del instance.nics[-1]
6288
        result.append(("nic.%d" % len(instance.nics), "remove"))
6289
      elif nic_op == constants.DDM_ADD:
6290
        # mac and bridge should be set, by now
6291
        mac = nic_dict['mac']
6292
        ip = nic_dict.get('ip', None)
6293
        nicparams = self.nic_pinst[constants.DDM_ADD]
6294
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
6295
        instance.nics.append(new_nic)
6296
        result.append(("nic.%d" % (len(instance.nics) - 1),
6297
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
6298
                       (new_nic.mac, new_nic.ip,
6299
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
6300
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
6301
                       )))
6302
      else:
6303
        for key in 'mac', 'ip':
6304
          if key in nic_dict:
6305
            setattr(instance.nics[nic_op], key, nic_dict[key])
6306
        if nic_op in self.nic_pnew:
6307
          instance.nics[nic_op].nicparams = self.nic_pnew[nic_op]
6308
        for key, val in nic_dict.iteritems():
6309
          result.append(("nic.%s/%d" % (key, nic_op), val))
6310

    
6311
    # hvparams changes
6312
    if self.op.hvparams:
6313
      instance.hvparams = self.hv_inst
6314
      for key, val in self.op.hvparams.iteritems():
6315
        result.append(("hv/%s" % key, val))
6316

    
6317
    # beparams changes
6318
    if self.op.beparams:
6319
      instance.beparams = self.be_inst
6320
      for key, val in self.op.beparams.iteritems():
6321
        result.append(("be/%s" % key, val))
6322

    
6323
    self.cfg.Update(instance)
6324

    
6325
    return result
6326

    
6327

    
6328
class LUQueryExports(NoHooksLU):
6329
  """Query the exports list
6330

6331
  """
6332
  _OP_REQP = ['nodes']
6333
  REQ_BGL = False
6334

    
6335
  def ExpandNames(self):
6336
    self.needed_locks = {}
6337
    self.share_locks[locking.LEVEL_NODE] = 1
6338
    if not self.op.nodes:
6339
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6340
    else:
6341
      self.needed_locks[locking.LEVEL_NODE] = \
6342
        _GetWantedNodes(self, self.op.nodes)
6343

    
6344
  def CheckPrereq(self):
6345
    """Check prerequisites.
6346

6347
    """
6348
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
6349

    
6350
  def Exec(self, feedback_fn):
6351
    """Compute the list of all the exported system images.
6352

6353
    @rtype: dict
6354
    @return: a dictionary with the structure node->(export-list)
6355
        where export-list is a list of the instances exported on
6356
        that node.
6357

6358
    """
6359
    rpcresult = self.rpc.call_export_list(self.nodes)
6360
    result = {}
6361
    for node in rpcresult:
6362
      if rpcresult[node].failed:
6363
        result[node] = False
6364
      else:
6365
        result[node] = rpcresult[node].data
6366

    
6367
    return result
6368

    
6369

    
6370
class LUExportInstance(LogicalUnit):
6371
  """Export an instance to an image in the cluster.
6372

6373
  """
6374
  HPATH = "instance-export"
6375
  HTYPE = constants.HTYPE_INSTANCE
6376
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
6377
  REQ_BGL = False
6378

    
6379
  def ExpandNames(self):
6380
    self._ExpandAndLockInstance()
6381
    # FIXME: lock only instance primary and destination node
6382
    #
6383
    # Sad but true, for now we have do lock all nodes, as we don't know where
6384
    # the previous export might be, and and in this LU we search for it and
6385
    # remove it from its current node. In the future we could fix this by:
6386
    #  - making a tasklet to search (share-lock all), then create the new one,
6387
    #    then one to remove, after
6388
    #  - removing the removal operation altoghether
6389
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6390

    
6391
  def DeclareLocks(self, level):
6392
    """Last minute lock declaration."""
6393
    # All nodes are locked anyway, so nothing to do here.
6394

    
6395
  def BuildHooksEnv(self):
6396
    """Build hooks env.
6397

6398
    This will run on the master, primary node and target node.
6399

6400
    """
6401
    env = {
6402
      "EXPORT_NODE": self.op.target_node,
6403
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
6404
      }
6405
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6406
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
6407
          self.op.target_node]
6408
    return env, nl, nl
6409

    
6410
  def CheckPrereq(self):
6411
    """Check prerequisites.
6412

6413
    This checks that the instance and node names are valid.
6414

6415
    """
6416
    instance_name = self.op.instance_name
6417
    self.instance = self.cfg.GetInstanceInfo(instance_name)
6418
    assert self.instance is not None, \
6419
          "Cannot retrieve locked instance %s" % self.op.instance_name
6420
    _CheckNodeOnline(self, self.instance.primary_node)
6421

    
6422
    self.dst_node = self.cfg.GetNodeInfo(
6423
      self.cfg.ExpandNodeName(self.op.target_node))
6424

    
6425
    if self.dst_node is None:
6426
      # This is wrong node name, not a non-locked node
6427
      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
6428
    _CheckNodeOnline(self, self.dst_node.name)
6429
    _CheckNodeNotDrained(self, self.dst_node.name)
6430

    
6431
    # instance disk type verification
6432
    for disk in self.instance.disks:
6433
      if disk.dev_type == constants.LD_FILE:
6434
        raise errors.OpPrereqError("Export not supported for instances with"
6435
                                   " file-based disks")
6436

    
6437
  def Exec(self, feedback_fn):
6438
    """Export an instance to an image in the cluster.
6439

6440
    """
6441
    instance = self.instance
6442
    dst_node = self.dst_node
6443
    src_node = instance.primary_node
6444
    if self.op.shutdown:
6445
      # shutdown the instance, but not the disks
6446
      result = self.rpc.call_instance_shutdown(src_node, instance)
6447
      msg = result.RemoteFailMsg()
6448
      if msg:
6449
        raise errors.OpExecError("Could not shutdown instance %s on"
6450
                                 " node %s: %s" %
6451
                                 (instance.name, src_node, msg))
6452

    
6453
    vgname = self.cfg.GetVGName()
6454

    
6455
    snap_disks = []
6456

    
6457
    # set the disks ID correctly since call_instance_start needs the
6458
    # correct drbd minor to create the symlinks
6459
    for disk in instance.disks:
6460
      self.cfg.SetDiskID(disk, src_node)
6461

    
6462
    try:
6463
      for disk in instance.disks:
6464
        # new_dev_name will be a snapshot of an lvm leaf of the one we passed
6465
        new_dev_name = self.rpc.call_blockdev_snapshot(src_node, disk)
6466
        if new_dev_name.failed or not new_dev_name.data:
6467
          self.LogWarning("Could not snapshot block device %s on node %s",
6468
                          disk.logical_id[1], src_node)
6469
          snap_disks.append(False)
6470
        else:
6471
          new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
6472
                                 logical_id=(vgname, new_dev_name.data),
6473
                                 physical_id=(vgname, new_dev_name.data),
6474
                                 iv_name=disk.iv_name)
6475
          snap_disks.append(new_dev)
6476

    
6477
    finally:
6478
      if self.op.shutdown and instance.admin_up:
6479
        result = self.rpc.call_instance_start(src_node, instance, None, None)
6480
        msg = result.RemoteFailMsg()
6481
        if msg:
6482
          _ShutdownInstanceDisks(self, instance)
6483
          raise errors.OpExecError("Could not start instance: %s" % msg)
6484

    
6485
    # TODO: check for size
6486

    
6487
    cluster_name = self.cfg.GetClusterName()
6488
    for idx, dev in enumerate(snap_disks):
6489
      if dev:
6490
        result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
6491
                                               instance, cluster_name, idx)
6492
        if result.failed or not result.data:
6493
          self.LogWarning("Could not export block device %s from node %s to"
6494
                          " node %s", dev.logical_id[1], src_node,
6495
                          dst_node.name)
6496
        msg = self.rpc.call_blockdev_remove(src_node, dev).RemoteFailMsg()
6497
        if msg:
6498
          self.LogWarning("Could not remove snapshot block device %s from node"
6499
                          " %s: %s", dev.logical_id[1], src_node, msg)
6500

    
6501
    result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
6502
    if result.failed or not result.data:
6503
      self.LogWarning("Could not finalize export for instance %s on node %s",
6504
                      instance.name, dst_node.name)
6505

    
6506
    nodelist = self.cfg.GetNodeList()
6507
    nodelist.remove(dst_node.name)
6508

    
6509
    # on one-node clusters nodelist will be empty after the removal
6510
    # if we proceed the backup would be removed because OpQueryExports
6511
    # substitutes an empty list with the full cluster node list.
6512
    if nodelist:
6513
      exportlist = self.rpc.call_export_list(nodelist)
6514
      for node in exportlist:
6515
        if exportlist[node].failed:
6516
          continue
6517
        if instance.name in exportlist[node].data:
6518
          if not self.rpc.call_export_remove(node, instance.name):
6519
            self.LogWarning("Could not remove older export for instance %s"
6520
                            " on node %s", instance.name, node)
6521

    
6522

    
6523
class LURemoveExport(NoHooksLU):
6524
  """Remove exports related to the named instance.
6525

6526
  """
6527
  _OP_REQP = ["instance_name"]
6528
  REQ_BGL = False
6529

    
6530
  def ExpandNames(self):
6531
    self.needed_locks = {}
6532
    # We need all nodes to be locked in order for RemoveExport to work, but we
6533
    # don't need to lock the instance itself, as nothing will happen to it (and
6534
    # we can remove exports also for a removed instance)
6535
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6536

    
6537
  def CheckPrereq(self):
6538
    """Check prerequisites.
6539
    """
6540
    pass
6541

    
6542
  def Exec(self, feedback_fn):
6543
    """Remove any export.
6544

6545
    """
6546
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
6547
    # If the instance was not found we'll try with the name that was passed in.
6548
    # This will only work if it was an FQDN, though.
6549
    fqdn_warn = False
6550
    if not instance_name:
6551
      fqdn_warn = True
6552
      instance_name = self.op.instance_name
6553

    
6554
    exportlist = self.rpc.call_export_list(self.acquired_locks[
6555
      locking.LEVEL_NODE])
6556
    found = False
6557
    for node in exportlist:
6558
      if exportlist[node].failed:
6559
        self.LogWarning("Failed to query node %s, continuing" % node)
6560
        continue
6561
      if instance_name in exportlist[node].data:
6562
        found = True
6563
        result = self.rpc.call_export_remove(node, instance_name)
6564
        if result.failed or not result.data:
6565
          logging.error("Could not remove export for instance %s"
6566
                        " on node %s", instance_name, node)
6567

    
6568
    if fqdn_warn and not found:
6569
      feedback_fn("Export not found. If trying to remove an export belonging"
6570
                  " to a deleted instance please use its Fully Qualified"
6571
                  " Domain Name.")
6572

    
6573

    
6574
class TagsLU(NoHooksLU):
6575
  """Generic tags LU.
6576

6577
  This is an abstract class which is the parent of all the other tags LUs.
6578

6579
  """
6580

    
6581
  def ExpandNames(self):
6582
    self.needed_locks = {}
6583
    if self.op.kind == constants.TAG_NODE:
6584
      name = self.cfg.ExpandNodeName(self.op.name)
6585
      if name is None:
6586
        raise errors.OpPrereqError("Invalid node name (%s)" %
6587
                                   (self.op.name,))
6588
      self.op.name = name
6589
      self.needed_locks[locking.LEVEL_NODE] = name
6590
    elif self.op.kind == constants.TAG_INSTANCE:
6591
      name = self.cfg.ExpandInstanceName(self.op.name)
6592
      if name is None:
6593
        raise errors.OpPrereqError("Invalid instance name (%s)" %
6594
                                   (self.op.name,))
6595
      self.op.name = name
6596
      self.needed_locks[locking.LEVEL_INSTANCE] = name
6597

    
6598
  def CheckPrereq(self):
6599
    """Check prerequisites.
6600

6601
    """
6602
    if self.op.kind == constants.TAG_CLUSTER:
6603
      self.target = self.cfg.GetClusterInfo()
6604
    elif self.op.kind == constants.TAG_NODE:
6605
      self.target = self.cfg.GetNodeInfo(self.op.name)
6606
    elif self.op.kind == constants.TAG_INSTANCE:
6607
      self.target = self.cfg.GetInstanceInfo(self.op.name)
6608
    else:
6609
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
6610
                                 str(self.op.kind))
6611

    
6612

    
6613
class LUGetTags(TagsLU):
6614
  """Returns the tags of a given object.
6615

6616
  """
6617
  _OP_REQP = ["kind", "name"]
6618
  REQ_BGL = False
6619

    
6620
  def Exec(self, feedback_fn):
6621
    """Returns the tag list.
6622

6623
    """
6624
    return list(self.target.GetTags())
6625

    
6626

    
6627
class LUSearchTags(NoHooksLU):
6628
  """Searches the tags for a given pattern.
6629

6630
  """
6631
  _OP_REQP = ["pattern"]
6632
  REQ_BGL = False
6633

    
6634
  def ExpandNames(self):
6635
    self.needed_locks = {}
6636

    
6637
  def CheckPrereq(self):
6638
    """Check prerequisites.
6639

6640
    This checks the pattern passed for validity by compiling it.
6641

6642
    """
6643
    try:
6644
      self.re = re.compile(self.op.pattern)
6645
    except re.error, err:
6646
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
6647
                                 (self.op.pattern, err))
6648

    
6649
  def Exec(self, feedback_fn):
6650
    """Returns the tag list.
6651

6652
    """
6653
    cfg = self.cfg
6654
    tgts = [("/cluster", cfg.GetClusterInfo())]
6655
    ilist = cfg.GetAllInstancesInfo().values()
6656
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
6657
    nlist = cfg.GetAllNodesInfo().values()
6658
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
6659
    results = []
6660
    for path, target in tgts:
6661
      for tag in target.GetTags():
6662
        if self.re.search(tag):
6663
          results.append((path, tag))
6664
    return results
6665

    
6666

    
6667
class LUAddTags(TagsLU):
6668
  """Sets a tag on a given object.
6669

6670
  """
6671
  _OP_REQP = ["kind", "name", "tags"]
6672
  REQ_BGL = False
6673

    
6674
  def CheckPrereq(self):
6675
    """Check prerequisites.
6676

6677
    This checks the type and length of the tag name and value.
6678

6679
    """
6680
    TagsLU.CheckPrereq(self)
6681
    for tag in self.op.tags:
6682
      objects.TaggableObject.ValidateTag(tag)
6683

    
6684
  def Exec(self, feedback_fn):
6685
    """Sets the tag.
6686

6687
    """
6688
    try:
6689
      for tag in self.op.tags:
6690
        self.target.AddTag(tag)
6691
    except errors.TagError, err:
6692
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
6693
    try:
6694
      self.cfg.Update(self.target)
6695
    except errors.ConfigurationError:
6696
      raise errors.OpRetryError("There has been a modification to the"
6697
                                " config file and the operation has been"
6698
                                " aborted. Please retry.")
6699

    
6700

    
6701
class LUDelTags(TagsLU):
6702
  """Delete a list of tags from a given object.
6703

6704
  """
6705
  _OP_REQP = ["kind", "name", "tags"]
6706
  REQ_BGL = False
6707

    
6708
  def CheckPrereq(self):
6709
    """Check prerequisites.
6710

6711
    This checks that we have the given tag.
6712

6713
    """
6714
    TagsLU.CheckPrereq(self)
6715
    for tag in self.op.tags:
6716
      objects.TaggableObject.ValidateTag(tag)
6717
    del_tags = frozenset(self.op.tags)
6718
    cur_tags = self.target.GetTags()
6719
    if not del_tags <= cur_tags:
6720
      diff_tags = del_tags - cur_tags
6721
      diff_names = ["'%s'" % tag for tag in diff_tags]
6722
      diff_names.sort()
6723
      raise errors.OpPrereqError("Tag(s) %s not found" %
6724
                                 (",".join(diff_names)))
6725

    
6726
  def Exec(self, feedback_fn):
6727
    """Remove the tag from the object.
6728

6729
    """
6730
    for tag in self.op.tags:
6731
      self.target.RemoveTag(tag)
6732
    try:
6733
      self.cfg.Update(self.target)
6734
    except errors.ConfigurationError:
6735
      raise errors.OpRetryError("There has been a modification to the"
6736
                                " config file and the operation has been"
6737
                                " aborted. Please retry.")
6738

    
6739

    
6740
class LUTestDelay(NoHooksLU):
6741
  """Sleep for a specified amount of time.
6742

6743
  This LU sleeps on the master and/or nodes for a specified amount of
6744
  time.
6745

6746
  """
6747
  _OP_REQP = ["duration", "on_master", "on_nodes"]
6748
  REQ_BGL = False
6749

    
6750
  def ExpandNames(self):
6751
    """Expand names and set required locks.
6752

6753
    This expands the node list, if any.
6754

6755
    """
6756
    self.needed_locks = {}
6757
    if self.op.on_nodes:
6758
      # _GetWantedNodes can be used here, but is not always appropriate to use
6759
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
6760
      # more information.
6761
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
6762
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
6763

    
6764
  def CheckPrereq(self):
6765
    """Check prerequisites.
6766

6767
    """
6768

    
6769
  def Exec(self, feedback_fn):
6770
    """Do the actual sleep.
6771

6772
    """
6773
    if self.op.on_master:
6774
      if not utils.TestDelay(self.op.duration):
6775
        raise errors.OpExecError("Error during master delay test")
6776
    if self.op.on_nodes:
6777
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
6778
      if not result:
6779
        raise errors.OpExecError("Complete failure from rpc call")
6780
      for node, node_result in result.items():
6781
        node_result.Raise()
6782
        if not node_result.data:
6783
          raise errors.OpExecError("Failure during rpc call to node %s,"
6784
                                   " result: %s" % (node, node_result.data))
6785

    
6786

    
6787
class IAllocator(object):
6788
  """IAllocator framework.
6789

6790
  An IAllocator instance has three sets of attributes:
6791
    - cfg that is needed to query the cluster
6792
    - input data (all members of the _KEYS class attribute are required)
6793
    - four buffer attributes (in|out_data|text), that represent the
6794
      input (to the external script) in text and data structure format,
6795
      and the output from it, again in two formats
6796
    - the result variables from the script (success, info, nodes) for
6797
      easy usage
6798

6799
  """
6800
  _ALLO_KEYS = [
6801
    "mem_size", "disks", "disk_template",
6802
    "os", "tags", "nics", "vcpus", "hypervisor",
6803
    ]
6804
  _RELO_KEYS = [
6805
    "relocate_from",
6806
    ]
6807

    
6808
  def __init__(self, lu, mode, name, **kwargs):
6809
    self.lu = lu
6810
    # init buffer variables
6811
    self.in_text = self.out_text = self.in_data = self.out_data = None
6812
    # init all input fields so that pylint is happy
6813
    self.mode = mode
6814
    self.name = name
6815
    self.mem_size = self.disks = self.disk_template = None
6816
    self.os = self.tags = self.nics = self.vcpus = None
6817
    self.hypervisor = None
6818
    self.relocate_from = None
6819
    # computed fields
6820
    self.required_nodes = None
6821
    # init result fields
6822
    self.success = self.info = self.nodes = None
6823
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
6824
      keyset = self._ALLO_KEYS
6825
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
6826
      keyset = self._RELO_KEYS
6827
    else:
6828
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
6829
                                   " IAllocator" % self.mode)
6830
    for key in kwargs:
6831
      if key not in keyset:
6832
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
6833
                                     " IAllocator" % key)
6834
      setattr(self, key, kwargs[key])
6835
    for key in keyset:
6836
      if key not in kwargs:
6837
        raise errors.ProgrammerError("Missing input parameter '%s' to"
6838
                                     " IAllocator" % key)
6839
    self._BuildInputData()
6840

    
6841
  def _ComputeClusterData(self):
6842
    """Compute the generic allocator input data.
6843

6844
    This is the data that is independent of the actual operation.
6845

6846
    """
6847
    cfg = self.lu.cfg
6848
    cluster_info = cfg.GetClusterInfo()
6849
    # cluster data
6850
    data = {
6851
      "version": constants.IALLOCATOR_VERSION,
6852
      "cluster_name": cfg.GetClusterName(),
6853
      "cluster_tags": list(cluster_info.GetTags()),
6854
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
6855
      # we don't have job IDs
6856
      }
6857
    iinfo = cfg.GetAllInstancesInfo().values()
6858
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
6859

    
6860
    # node data
6861
    node_results = {}
6862
    node_list = cfg.GetNodeList()
6863

    
6864
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
6865
      hypervisor_name = self.hypervisor
6866
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
6867
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
6868

    
6869
    node_data = self.lu.rpc.call_node_info(node_list, cfg.GetVGName(),
6870
                                           hypervisor_name)
6871
    node_iinfo = self.lu.rpc.call_all_instances_info(node_list,
6872
                       cluster_info.enabled_hypervisors)
6873
    for nname, nresult in node_data.items():
6874
      # first fill in static (config-based) values
6875
      ninfo = cfg.GetNodeInfo(nname)
6876
      pnr = {
6877
        "tags": list(ninfo.GetTags()),
6878
        "primary_ip": ninfo.primary_ip,
6879
        "secondary_ip": ninfo.secondary_ip,
6880
        "offline": ninfo.offline,
6881
        "drained": ninfo.drained,
6882
        "master_candidate": ninfo.master_candidate,
6883
        }
6884

    
6885
      if not ninfo.offline:
6886
        nresult.Raise()
6887
        if not isinstance(nresult.data, dict):
6888
          raise errors.OpExecError("Can't get data for node %s" % nname)
6889
        remote_info = nresult.data
6890
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
6891
                     'vg_size', 'vg_free', 'cpu_total']:
6892
          if attr not in remote_info:
6893
            raise errors.OpExecError("Node '%s' didn't return attribute"
6894
                                     " '%s'" % (nname, attr))
6895
          try:
6896
            remote_info[attr] = int(remote_info[attr])
6897
          except ValueError, err:
6898
            raise errors.OpExecError("Node '%s' returned invalid value"
6899
                                     " for '%s': %s" % (nname, attr, err))
6900
        # compute memory used by primary instances
6901
        i_p_mem = i_p_up_mem = 0
6902
        for iinfo, beinfo in i_list:
6903
          if iinfo.primary_node == nname:
6904
            i_p_mem += beinfo[constants.BE_MEMORY]
6905
            if iinfo.name not in node_iinfo[nname].data:
6906
              i_used_mem = 0
6907
            else:
6908
              i_used_mem = int(node_iinfo[nname].data[iinfo.name]['memory'])
6909
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
6910
            remote_info['memory_free'] -= max(0, i_mem_diff)
6911

    
6912
            if iinfo.admin_up:
6913
              i_p_up_mem += beinfo[constants.BE_MEMORY]
6914

    
6915
        # compute memory used by instances
6916
        pnr_dyn = {
6917
          "total_memory": remote_info['memory_total'],
6918
          "reserved_memory": remote_info['memory_dom0'],
6919
          "free_memory": remote_info['memory_free'],
6920
          "total_disk": remote_info['vg_size'],
6921
          "free_disk": remote_info['vg_free'],
6922
          "total_cpus": remote_info['cpu_total'],
6923
          "i_pri_memory": i_p_mem,
6924
          "i_pri_up_memory": i_p_up_mem,
6925
          }
6926
        pnr.update(pnr_dyn)
6927

    
6928
      node_results[nname] = pnr
6929
    data["nodes"] = node_results
6930

    
6931
    # instance data
6932
    instance_data = {}
6933
    for iinfo, beinfo in i_list:
6934
      nic_data = [{"mac": n.mac, "ip": n.ip, "bridge": n.bridge}
6935
                  for n in iinfo.nics]
6936
      pir = {
6937
        "tags": list(iinfo.GetTags()),
6938
        "admin_up": iinfo.admin_up,
6939
        "vcpus": beinfo[constants.BE_VCPUS],
6940
        "memory": beinfo[constants.BE_MEMORY],
6941
        "os": iinfo.os,
6942
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
6943
        "nics": nic_data,
6944
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
6945
        "disk_template": iinfo.disk_template,
6946
        "hypervisor": iinfo.hypervisor,
6947
        }
6948
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
6949
                                                 pir["disks"])
6950
      instance_data[iinfo.name] = pir
6951

    
6952
    data["instances"] = instance_data
6953

    
6954
    self.in_data = data
6955

    
6956
  def _AddNewInstance(self):
6957
    """Add new instance data to allocator structure.
6958

6959
    This in combination with _AllocatorGetClusterData will create the
6960
    correct structure needed as input for the allocator.
6961

6962
    The checks for the completeness of the opcode must have already been
6963
    done.
6964

6965
    """
6966
    data = self.in_data
6967

    
6968
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
6969

    
6970
    if self.disk_template in constants.DTS_NET_MIRROR:
6971
      self.required_nodes = 2
6972
    else:
6973
      self.required_nodes = 1
6974
    request = {
6975
      "type": "allocate",
6976
      "name": self.name,
6977
      "disk_template": self.disk_template,
6978
      "tags": self.tags,
6979
      "os": self.os,
6980
      "vcpus": self.vcpus,
6981
      "memory": self.mem_size,
6982
      "disks": self.disks,
6983
      "disk_space_total": disk_space,
6984
      "nics": self.nics,
6985
      "required_nodes": self.required_nodes,
6986
      }
6987
    data["request"] = request
6988

    
6989
  def _AddRelocateInstance(self):
6990
    """Add relocate instance data to allocator structure.
6991

6992
    This in combination with _IAllocatorGetClusterData will create the
6993
    correct structure needed as input for the allocator.
6994

6995
    The checks for the completeness of the opcode must have already been
6996
    done.
6997

6998
    """
6999
    instance = self.lu.cfg.GetInstanceInfo(self.name)
7000
    if instance is None:
7001
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
7002
                                   " IAllocator" % self.name)
7003

    
7004
    if instance.disk_template not in constants.DTS_NET_MIRROR:
7005
      raise errors.OpPrereqError("Can't relocate non-mirrored instances")
7006

    
7007
    if len(instance.secondary_nodes) != 1:
7008
      raise errors.OpPrereqError("Instance has not exactly one secondary node")
7009

    
7010
    self.required_nodes = 1
7011
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
7012
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
7013

    
7014
    request = {
7015
      "type": "relocate",
7016
      "name": self.name,
7017
      "disk_space_total": disk_space,
7018
      "required_nodes": self.required_nodes,
7019
      "relocate_from": self.relocate_from,
7020
      }
7021
    self.in_data["request"] = request
7022

    
7023
  def _BuildInputData(self):
7024
    """Build input data structures.
7025

7026
    """
7027
    self._ComputeClusterData()
7028

    
7029
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7030
      self._AddNewInstance()
7031
    else:
7032
      self._AddRelocateInstance()
7033

    
7034
    self.in_text = serializer.Dump(self.in_data)
7035

    
7036
  def Run(self, name, validate=True, call_fn=None):
7037
    """Run an instance allocator and return the results.
7038

7039
    """
7040
    if call_fn is None:
7041
      call_fn = self.lu.rpc.call_iallocator_runner
7042
    data = self.in_text
7043

    
7044
    result = call_fn(self.lu.cfg.GetMasterNode(), name, self.in_text)
7045
    result.Raise()
7046

    
7047
    if not isinstance(result.data, (list, tuple)) or len(result.data) != 4:
7048
      raise errors.OpExecError("Invalid result from master iallocator runner")
7049

    
7050
    rcode, stdout, stderr, fail = result.data
7051

    
7052
    if rcode == constants.IARUN_NOTFOUND:
7053
      raise errors.OpExecError("Can't find allocator '%s'" % name)
7054
    elif rcode == constants.IARUN_FAILURE:
7055
      raise errors.OpExecError("Instance allocator call failed: %s,"
7056
                               " output: %s" % (fail, stdout+stderr))
7057
    self.out_text = stdout
7058
    if validate:
7059
      self._ValidateResult()
7060

    
7061
  def _ValidateResult(self):
7062
    """Process the allocator results.
7063

7064
    This will process and if successful save the result in
7065
    self.out_data and the other parameters.
7066

7067
    """
7068
    try:
7069
      rdict = serializer.Load(self.out_text)
7070
    except Exception, err:
7071
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
7072

    
7073
    if not isinstance(rdict, dict):
7074
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
7075

    
7076
    for key in "success", "info", "nodes":
7077
      if key not in rdict:
7078
        raise errors.OpExecError("Can't parse iallocator results:"
7079
                                 " missing key '%s'" % key)
7080
      setattr(self, key, rdict[key])
7081

    
7082
    if not isinstance(rdict["nodes"], list):
7083
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
7084
                               " is not a list")
7085
    self.out_data = rdict
7086

    
7087

    
7088
class LUTestAllocator(NoHooksLU):
7089
  """Run allocator tests.
7090

7091
  This LU runs the allocator tests
7092

7093
  """
7094
  _OP_REQP = ["direction", "mode", "name"]
7095

    
7096
  def CheckPrereq(self):
7097
    """Check prerequisites.
7098

7099
    This checks the opcode parameters depending on the director and mode test.
7100

7101
    """
7102
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
7103
      for attr in ["name", "mem_size", "disks", "disk_template",
7104
                   "os", "tags", "nics", "vcpus"]:
7105
        if not hasattr(self.op, attr):
7106
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
7107
                                     attr)
7108
      iname = self.cfg.ExpandInstanceName(self.op.name)
7109
      if iname is not None:
7110
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
7111
                                   iname)
7112
      if not isinstance(self.op.nics, list):
7113
        raise errors.OpPrereqError("Invalid parameter 'nics'")
7114
      for row in self.op.nics:
7115
        if (not isinstance(row, dict) or
7116
            "mac" not in row or
7117
            "ip" not in row or
7118
            "bridge" not in row):
7119
          raise errors.OpPrereqError("Invalid contents of the"
7120
                                     " 'nics' parameter")
7121
      if not isinstance(self.op.disks, list):
7122
        raise errors.OpPrereqError("Invalid parameter 'disks'")
7123
      for row in self.op.disks:
7124
        if (not isinstance(row, dict) or
7125
            "size" not in row or
7126
            not isinstance(row["size"], int) or
7127
            "mode" not in row or
7128
            row["mode"] not in ['r', 'w']):
7129
          raise errors.OpPrereqError("Invalid contents of the"
7130
                                     " 'disks' parameter")
7131
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
7132
        self.op.hypervisor = self.cfg.GetHypervisorType()
7133
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
7134
      if not hasattr(self.op, "name"):
7135
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
7136
      fname = self.cfg.ExpandInstanceName(self.op.name)
7137
      if fname is None:
7138
        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
7139
                                   self.op.name)
7140
      self.op.name = fname
7141
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
7142
    else:
7143
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
7144
                                 self.op.mode)
7145

    
7146
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
7147
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
7148
        raise errors.OpPrereqError("Missing allocator name")
7149
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
7150
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
7151
                                 self.op.direction)
7152

    
7153
  def Exec(self, feedback_fn):
7154
    """Run the allocator test.
7155

7156
    """
7157
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
7158
      ial = IAllocator(self,
7159
                       mode=self.op.mode,
7160
                       name=self.op.name,
7161
                       mem_size=self.op.mem_size,
7162
                       disks=self.op.disks,
7163
                       disk_template=self.op.disk_template,
7164
                       os=self.op.os,
7165
                       tags=self.op.tags,
7166
                       nics=self.op.nics,
7167
                       vcpus=self.op.vcpus,
7168
                       hypervisor=self.op.hypervisor,
7169
                       )
7170
    else:
7171
      ial = IAllocator(self,
7172
                       mode=self.op.mode,
7173
                       name=self.op.name,
7174
                       relocate_from=list(self.relocate_from),
7175
                       )
7176

    
7177
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
7178
      result = ial.in_text
7179
    else:
7180
      ial.Run(self.op.allocator, validate=False)
7181
      result = ial.out_text
7182
    return result