Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 9a6800e1

History | View | Annotate | Download (256.3 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0613,W0201
25

    
26
import os
27
import os.path
28
import time
29
import re
30
import platform
31
import logging
32
import copy
33

    
34
from ganeti import ssh
35
from ganeti import utils
36
from ganeti import errors
37
from ganeti import hypervisor
38
from ganeti import locking
39
from ganeti import constants
40
from ganeti import objects
41
from ganeti import serializer
42
from ganeti import ssconf
43

    
44

    
45
class LogicalUnit(object):
46
  """Logical Unit base class.
47

48
  Subclasses must follow these rules:
49
    - implement ExpandNames
50
    - implement CheckPrereq
51
    - implement Exec
52
    - implement BuildHooksEnv
53
    - redefine HPATH and HTYPE
54
    - optionally redefine their run requirements:
55
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
56

57
  Note that all commands require root permissions.
58

59
  @ivar dry_run_result: the value (if any) that will be returned to the caller
60
      in dry-run mode (signalled by opcode dry_run parameter)
61

62
  """
63
  HPATH = None
64
  HTYPE = None
65
  _OP_REQP = []
66
  REQ_BGL = True
67

    
68
  def __init__(self, processor, op, context, rpc):
69
    """Constructor for LogicalUnit.
70

71
    This needs to be overridden in derived classes in order to check op
72
    validity.
73

74
    """
75
    self.proc = processor
76
    self.op = op
77
    self.cfg = context.cfg
78
    self.context = context
79
    self.rpc = rpc
80
    # Dicts used to declare locking needs to mcpu
81
    self.needed_locks = None
82
    self.acquired_locks = {}
83
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
84
    self.add_locks = {}
85
    self.remove_locks = {}
86
    # Used to force good behavior when calling helper functions
87
    self.recalculate_locks = {}
88
    self.__ssh = None
89
    # logging
90
    self.LogWarning = processor.LogWarning
91
    self.LogInfo = processor.LogInfo
92
    self.LogStep = processor.LogStep
93
    # support for dry-run
94
    self.dry_run_result = None
95

    
96
    for attr_name in self._OP_REQP:
97
      attr_val = getattr(op, attr_name, None)
98
      if attr_val is None:
99
        raise errors.OpPrereqError("Required parameter '%s' missing" %
100
                                   attr_name)
101
    self.CheckArguments()
102

    
103
  def __GetSSH(self):
104
    """Returns the SshRunner object
105

106
    """
107
    if not self.__ssh:
108
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
109
    return self.__ssh
110

    
111
  ssh = property(fget=__GetSSH)
112

    
113
  def CheckArguments(self):
114
    """Check syntactic validity for the opcode arguments.
115

116
    This method is for doing a simple syntactic check and ensure
117
    validity of opcode parameters, without any cluster-related
118
    checks. While the same can be accomplished in ExpandNames and/or
119
    CheckPrereq, doing these separate is better because:
120

121
      - ExpandNames is left as as purely a lock-related function
122
      - CheckPrereq is run after we have acquired locks (and possible
123
        waited for them)
124

125
    The function is allowed to change the self.op attribute so that
126
    later methods can no longer worry about missing parameters.
127

128
    """
129
    pass
130

    
131
  def ExpandNames(self):
132
    """Expand names for this LU.
133

134
    This method is called before starting to execute the opcode, and it should
135
    update all the parameters of the opcode to their canonical form (e.g. a
136
    short node name must be fully expanded after this method has successfully
137
    completed). This way locking, hooks, logging, ecc. can work correctly.
138

139
    LUs which implement this method must also populate the self.needed_locks
140
    member, as a dict with lock levels as keys, and a list of needed lock names
141
    as values. Rules:
142

143
      - use an empty dict if you don't need any lock
144
      - if you don't need any lock at a particular level omit that level
145
      - don't put anything for the BGL level
146
      - if you want all locks at a level use locking.ALL_SET as a value
147

148
    If you need to share locks (rather than acquire them exclusively) at one
149
    level you can modify self.share_locks, setting a true value (usually 1) for
150
    that level. By default locks are not shared.
151

152
    Examples::
153

154
      # Acquire all nodes and one instance
155
      self.needed_locks = {
156
        locking.LEVEL_NODE: locking.ALL_SET,
157
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
158
      }
159
      # Acquire just two nodes
160
      self.needed_locks = {
161
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
162
      }
163
      # Acquire no locks
164
      self.needed_locks = {} # No, you can't leave it to the default value None
165

166
    """
167
    # The implementation of this method is mandatory only if the new LU is
168
    # concurrent, so that old LUs don't need to be changed all at the same
169
    # time.
170
    if self.REQ_BGL:
171
      self.needed_locks = {} # Exclusive LUs don't need locks.
172
    else:
173
      raise NotImplementedError
174

    
175
  def DeclareLocks(self, level):
176
    """Declare LU locking needs for a level
177

178
    While most LUs can just declare their locking needs at ExpandNames time,
179
    sometimes there's the need to calculate some locks after having acquired
180
    the ones before. This function is called just before acquiring locks at a
181
    particular level, but after acquiring the ones at lower levels, and permits
182
    such calculations. It can be used to modify self.needed_locks, and by
183
    default it does nothing.
184

185
    This function is only called if you have something already set in
186
    self.needed_locks for the level.
187

188
    @param level: Locking level which is going to be locked
189
    @type level: member of ganeti.locking.LEVELS
190

191
    """
192

    
193
  def CheckPrereq(self):
194
    """Check prerequisites for this LU.
195

196
    This method should check that the prerequisites for the execution
197
    of this LU are fulfilled. It can do internode communication, but
198
    it should be idempotent - no cluster or system changes are
199
    allowed.
200

201
    The method should raise errors.OpPrereqError in case something is
202
    not fulfilled. Its return value is ignored.
203

204
    This method should also update all the parameters of the opcode to
205
    their canonical form if it hasn't been done by ExpandNames before.
206

207
    """
208
    raise NotImplementedError
209

    
210
  def Exec(self, feedback_fn):
211
    """Execute the LU.
212

213
    This method should implement the actual work. It should raise
214
    errors.OpExecError for failures that are somewhat dealt with in
215
    code, or expected.
216

217
    """
218
    raise NotImplementedError
219

    
220
  def BuildHooksEnv(self):
221
    """Build hooks environment for this LU.
222

223
    This method should return a three-node tuple consisting of: a dict
224
    containing the environment that will be used for running the
225
    specific hook for this LU, a list of node names on which the hook
226
    should run before the execution, and a list of node names on which
227
    the hook should run after the execution.
228

229
    The keys of the dict must not have 'GANETI_' prefixed as this will
230
    be handled in the hooks runner. Also note additional keys will be
231
    added by the hooks runner. If the LU doesn't define any
232
    environment, an empty dict (and not None) should be returned.
233

234
    No nodes should be returned as an empty list (and not None).
235

236
    Note that if the HPATH for a LU class is None, this function will
237
    not be called.
238

239
    """
240
    raise NotImplementedError
241

    
242
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
243
    """Notify the LU about the results of its hooks.
244

245
    This method is called every time a hooks phase is executed, and notifies
246
    the Logical Unit about the hooks' result. The LU can then use it to alter
247
    its result based on the hooks.  By default the method does nothing and the
248
    previous result is passed back unchanged but any LU can define it if it
249
    wants to use the local cluster hook-scripts somehow.
250

251
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
252
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
253
    @param hook_results: the results of the multi-node hooks rpc call
254
    @param feedback_fn: function used send feedback back to the caller
255
    @param lu_result: the previous Exec result this LU had, or None
256
        in the PRE phase
257
    @return: the new Exec result, based on the previous result
258
        and hook results
259

260
    """
261
    return lu_result
262

    
263
  def _ExpandAndLockInstance(self):
264
    """Helper function to expand and lock an instance.
265

266
    Many LUs that work on an instance take its name in self.op.instance_name
267
    and need to expand it and then declare the expanded name for locking. This
268
    function does it, and then updates self.op.instance_name to the expanded
269
    name. It also initializes needed_locks as a dict, if this hasn't been done
270
    before.
271

272
    """
273
    if self.needed_locks is None:
274
      self.needed_locks = {}
275
    else:
276
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
277
        "_ExpandAndLockInstance called with instance-level locks set"
278
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
279
    if expanded_name is None:
280
      raise errors.OpPrereqError("Instance '%s' not known" %
281
                                  self.op.instance_name)
282
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
283
    self.op.instance_name = expanded_name
284

    
285
  def _LockInstancesNodes(self, primary_only=False):
286
    """Helper function to declare instances' nodes for locking.
287

288
    This function should be called after locking one or more instances to lock
289
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
290
    with all primary or secondary nodes for instances already locked and
291
    present in self.needed_locks[locking.LEVEL_INSTANCE].
292

293
    It should be called from DeclareLocks, and for safety only works if
294
    self.recalculate_locks[locking.LEVEL_NODE] is set.
295

296
    In the future it may grow parameters to just lock some instance's nodes, or
297
    to just lock primaries or secondary nodes, if needed.
298

299
    If should be called in DeclareLocks in a way similar to::
300

301
      if level == locking.LEVEL_NODE:
302
        self._LockInstancesNodes()
303

304
    @type primary_only: boolean
305
    @param primary_only: only lock primary nodes of locked instances
306

307
    """
308
    assert locking.LEVEL_NODE in self.recalculate_locks, \
309
      "_LockInstancesNodes helper function called with no nodes to recalculate"
310

    
311
    # TODO: check if we're really been called with the instance locks held
312

    
313
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
314
    # future we might want to have different behaviors depending on the value
315
    # of self.recalculate_locks[locking.LEVEL_NODE]
316
    wanted_nodes = []
317
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
318
      instance = self.context.cfg.GetInstanceInfo(instance_name)
319
      wanted_nodes.append(instance.primary_node)
320
      if not primary_only:
321
        wanted_nodes.extend(instance.secondary_nodes)
322

    
323
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
324
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
325
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
326
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
327

    
328
    del self.recalculate_locks[locking.LEVEL_NODE]
329

    
330

    
331
class NoHooksLU(LogicalUnit):
332
  """Simple LU which runs no hooks.
333

334
  This LU is intended as a parent for other LogicalUnits which will
335
  run no hooks, in order to reduce duplicate code.
336

337
  """
338
  HPATH = None
339
  HTYPE = None
340

    
341

    
342
class Tasklet:
343
  """Tasklet base class.
344

345
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
346
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
347
  tasklets know nothing about locks.
348

349
  Subclasses must follow these rules:
350
    - Implement CheckPrereq
351
    - Implement Exec
352

353
  """
354
  def CheckPrereq(self):
355
    """Check prerequisites for this tasklets.
356

357
    This method should check whether the prerequisites for the execution of
358
    this tasklet are fulfilled. It can do internode communication, but it
359
    should be idempotent - no cluster or system changes are allowed.
360

361
    The method should raise errors.OpPrereqError in case something is not
362
    fulfilled. Its return value is ignored.
363

364
    This method should also update all parameters to their canonical form if it
365
    hasn't been done before.
366

367
    """
368
    raise NotImplementedError
369

    
370
  def Exec(self, feedback_fn):
371
    """Execute the tasklet.
372

373
    This method should implement the actual work. It should raise
374
    errors.OpExecError for failures that are somewhat dealt with in code, or
375
    expected.
376

377
    """
378
    raise NotImplementedError
379

    
380

    
381
def _GetWantedNodes(lu, nodes):
382
  """Returns list of checked and expanded node names.
383

384
  @type lu: L{LogicalUnit}
385
  @param lu: the logical unit on whose behalf we execute
386
  @type nodes: list
387
  @param nodes: list of node names or None for all nodes
388
  @rtype: list
389
  @return: the list of nodes, sorted
390
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
391

392
  """
393
  if not isinstance(nodes, list):
394
    raise errors.OpPrereqError("Invalid argument type 'nodes'")
395

    
396
  if not nodes:
397
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
398
      " non-empty list of nodes whose name is to be expanded.")
399

    
400
  wanted = []
401
  for name in nodes:
402
    node = lu.cfg.ExpandNodeName(name)
403
    if node is None:
404
      raise errors.OpPrereqError("No such node name '%s'" % name)
405
    wanted.append(node)
406

    
407
  return utils.NiceSort(wanted)
408

    
409

    
410
def _GetWantedInstances(lu, instances):
411
  """Returns list of checked and expanded instance names.
412

413
  @type lu: L{LogicalUnit}
414
  @param lu: the logical unit on whose behalf we execute
415
  @type instances: list
416
  @param instances: list of instance names or None for all instances
417
  @rtype: list
418
  @return: the list of instances, sorted
419
  @raise errors.OpPrereqError: if the instances parameter is wrong type
420
  @raise errors.OpPrereqError: if any of the passed instances is not found
421

422
  """
423
  if not isinstance(instances, list):
424
    raise errors.OpPrereqError("Invalid argument type 'instances'")
425

    
426
  if instances:
427
    wanted = []
428

    
429
    for name in instances:
430
      instance = lu.cfg.ExpandInstanceName(name)
431
      if instance is None:
432
        raise errors.OpPrereqError("No such instance name '%s'" % name)
433
      wanted.append(instance)
434

    
435
  else:
436
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
437
  return wanted
438

    
439

    
440
def _CheckOutputFields(static, dynamic, selected):
441
  """Checks whether all selected fields are valid.
442

443
  @type static: L{utils.FieldSet}
444
  @param static: static fields set
445
  @type dynamic: L{utils.FieldSet}
446
  @param dynamic: dynamic fields set
447

448
  """
449
  f = utils.FieldSet()
450
  f.Extend(static)
451
  f.Extend(dynamic)
452

    
453
  delta = f.NonMatching(selected)
454
  if delta:
455
    raise errors.OpPrereqError("Unknown output fields selected: %s"
456
                               % ",".join(delta))
457

    
458

    
459
def _CheckBooleanOpField(op, name):
460
  """Validates boolean opcode parameters.
461

462
  This will ensure that an opcode parameter is either a boolean value,
463
  or None (but that it always exists).
464

465
  """
466
  val = getattr(op, name, None)
467
  if not (val is None or isinstance(val, bool)):
468
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
469
                               (name, str(val)))
470
  setattr(op, name, val)
471

    
472

    
473
def _CheckNodeOnline(lu, node):
474
  """Ensure that a given node is online.
475

476
  @param lu: the LU on behalf of which we make the check
477
  @param node: the node to check
478
  @raise errors.OpPrereqError: if the node is offline
479

480
  """
481
  if lu.cfg.GetNodeInfo(node).offline:
482
    raise errors.OpPrereqError("Can't use offline node %s" % node)
483

    
484

    
485
def _CheckNodeNotDrained(lu, node):
486
  """Ensure that a given node is not drained.
487

488
  @param lu: the LU on behalf of which we make the check
489
  @param node: the node to check
490
  @raise errors.OpPrereqError: if the node is drained
491

492
  """
493
  if lu.cfg.GetNodeInfo(node).drained:
494
    raise errors.OpPrereqError("Can't use drained node %s" % node)
495

    
496

    
497
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
498
                          memory, vcpus, nics, disk_template, disks,
499
                          bep, hvp, hypervisor_name):
500
  """Builds instance related env variables for hooks
501

502
  This builds the hook environment from individual variables.
503

504
  @type name: string
505
  @param name: the name of the instance
506
  @type primary_node: string
507
  @param primary_node: the name of the instance's primary node
508
  @type secondary_nodes: list
509
  @param secondary_nodes: list of secondary nodes as strings
510
  @type os_type: string
511
  @param os_type: the name of the instance's OS
512
  @type status: boolean
513
  @param status: the should_run status of the instance
514
  @type memory: string
515
  @param memory: the memory size of the instance
516
  @type vcpus: string
517
  @param vcpus: the count of VCPUs the instance has
518
  @type nics: list
519
  @param nics: list of tuples (ip, mac, mode, link) representing
520
      the NICs the instance has
521
  @type disk_template: string
522
  @param disk_template: the disk template of the instance
523
  @type disks: list
524
  @param disks: the list of (size, mode) pairs
525
  @type bep: dict
526
  @param bep: the backend parameters for the instance
527
  @type hvp: dict
528
  @param hvp: the hypervisor parameters for the instance
529
  @type hypervisor_name: string
530
  @param hypervisor_name: the hypervisor for the instance
531
  @rtype: dict
532
  @return: the hook environment for this instance
533

534
  """
535
  if status:
536
    str_status = "up"
537
  else:
538
    str_status = "down"
539
  env = {
540
    "OP_TARGET": name,
541
    "INSTANCE_NAME": name,
542
    "INSTANCE_PRIMARY": primary_node,
543
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
544
    "INSTANCE_OS_TYPE": os_type,
545
    "INSTANCE_STATUS": str_status,
546
    "INSTANCE_MEMORY": memory,
547
    "INSTANCE_VCPUS": vcpus,
548
    "INSTANCE_DISK_TEMPLATE": disk_template,
549
    "INSTANCE_HYPERVISOR": hypervisor_name,
550
  }
551

    
552
  if nics:
553
    nic_count = len(nics)
554
    for idx, (ip, mac, mode, link) in enumerate(nics):
555
      if ip is None:
556
        ip = ""
557
      env["INSTANCE_NIC%d_IP" % idx] = ip
558
      env["INSTANCE_NIC%d_MAC" % idx] = mac
559
      env["INSTANCE_NIC%d_MODE" % idx] = mode
560
      env["INSTANCE_NIC%d_LINK" % idx] = link
561
      if mode == constants.NIC_MODE_BRIDGED:
562
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
563
  else:
564
    nic_count = 0
565

    
566
  env["INSTANCE_NIC_COUNT"] = nic_count
567

    
568
  if disks:
569
    disk_count = len(disks)
570
    for idx, (size, mode) in enumerate(disks):
571
      env["INSTANCE_DISK%d_SIZE" % idx] = size
572
      env["INSTANCE_DISK%d_MODE" % idx] = mode
573
  else:
574
    disk_count = 0
575

    
576
  env["INSTANCE_DISK_COUNT"] = disk_count
577

    
578
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
579
    for key, value in source.items():
580
      env["INSTANCE_%s_%s" % (kind, key)] = value
581

    
582
  return env
583

    
584
def _NICListToTuple(lu, nics):
585
  """Build a list of nic information tuples.
586

587
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
588
  value in LUQueryInstanceData.
589

590
  @type lu:  L{LogicalUnit}
591
  @param lu: the logical unit on whose behalf we execute
592
  @type nics: list of L{objects.NIC}
593
  @param nics: list of nics to convert to hooks tuples
594

595
  """
596
  hooks_nics = []
597
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
598
  for nic in nics:
599
    ip = nic.ip
600
    mac = nic.mac
601
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
602
    mode = filled_params[constants.NIC_MODE]
603
    link = filled_params[constants.NIC_LINK]
604
    hooks_nics.append((ip, mac, mode, link))
605
  return hooks_nics
606

    
607
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
608
  """Builds instance related env variables for hooks from an object.
609

610
  @type lu: L{LogicalUnit}
611
  @param lu: the logical unit on whose behalf we execute
612
  @type instance: L{objects.Instance}
613
  @param instance: the instance for which we should build the
614
      environment
615
  @type override: dict
616
  @param override: dictionary with key/values that will override
617
      our values
618
  @rtype: dict
619
  @return: the hook environment dictionary
620

621
  """
622
  cluster = lu.cfg.GetClusterInfo()
623
  bep = cluster.FillBE(instance)
624
  hvp = cluster.FillHV(instance)
625
  args = {
626
    'name': instance.name,
627
    'primary_node': instance.primary_node,
628
    'secondary_nodes': instance.secondary_nodes,
629
    'os_type': instance.os,
630
    'status': instance.admin_up,
631
    'memory': bep[constants.BE_MEMORY],
632
    'vcpus': bep[constants.BE_VCPUS],
633
    'nics': _NICListToTuple(lu, instance.nics),
634
    'disk_template': instance.disk_template,
635
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
636
    'bep': bep,
637
    'hvp': hvp,
638
    'hypervisor_name': instance.hypervisor,
639
  }
640
  if override:
641
    args.update(override)
642
  return _BuildInstanceHookEnv(**args)
643

    
644

    
645
def _AdjustCandidatePool(lu):
646
  """Adjust the candidate pool after node operations.
647

648
  """
649
  mod_list = lu.cfg.MaintainCandidatePool()
650
  if mod_list:
651
    lu.LogInfo("Promoted nodes to master candidate role: %s",
652
               ", ".join(node.name for node in mod_list))
653
    for name in mod_list:
654
      lu.context.ReaddNode(name)
655
  mc_now, mc_max = lu.cfg.GetMasterCandidateStats()
656
  if mc_now > mc_max:
657
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
658
               (mc_now, mc_max))
659

    
660

    
661
def _CheckNicsBridgesExist(lu, target_nics, target_node,
662
                               profile=constants.PP_DEFAULT):
663
  """Check that the brigdes needed by a list of nics exist.
664

665
  """
666
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
667
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
668
                for nic in target_nics]
669
  brlist = [params[constants.NIC_LINK] for params in paramslist
670
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
671
  if brlist:
672
    result = lu.rpc.call_bridges_exist(target_node, brlist)
673
    result.Raise("Error checking bridges on destination node '%s'" %
674
                 target_node, prereq=True)
675

    
676

    
677
def _CheckInstanceBridgesExist(lu, instance, node=None):
678
  """Check that the brigdes needed by an instance exist.
679

680
  """
681
  if node is None:
682
    node = instance.primary_node
683
  _CheckNicsBridgesExist(lu, instance.nics, node)
684

    
685

    
686
class LUDestroyCluster(NoHooksLU):
687
  """Logical unit for destroying the cluster.
688

689
  """
690
  _OP_REQP = []
691

    
692
  def CheckPrereq(self):
693
    """Check prerequisites.
694

695
    This checks whether the cluster is empty.
696

697
    Any errors are signaled by raising errors.OpPrereqError.
698

699
    """
700
    master = self.cfg.GetMasterNode()
701

    
702
    nodelist = self.cfg.GetNodeList()
703
    if len(nodelist) != 1 or nodelist[0] != master:
704
      raise errors.OpPrereqError("There are still %d node(s) in"
705
                                 " this cluster." % (len(nodelist) - 1))
706
    instancelist = self.cfg.GetInstanceList()
707
    if instancelist:
708
      raise errors.OpPrereqError("There are still %d instance(s) in"
709
                                 " this cluster." % len(instancelist))
710

    
711
  def Exec(self, feedback_fn):
712
    """Destroys the cluster.
713

714
    """
715
    master = self.cfg.GetMasterNode()
716
    result = self.rpc.call_node_stop_master(master, False)
717
    result.Raise("Could not disable the master role")
718
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
719
    utils.CreateBackup(priv_key)
720
    utils.CreateBackup(pub_key)
721
    return master
722

    
723

    
724
class LUVerifyCluster(LogicalUnit):
725
  """Verifies the cluster status.
726

727
  """
728
  HPATH = "cluster-verify"
729
  HTYPE = constants.HTYPE_CLUSTER
730
  _OP_REQP = ["skip_checks"]
731
  REQ_BGL = False
732

    
733
  def ExpandNames(self):
734
    self.needed_locks = {
735
      locking.LEVEL_NODE: locking.ALL_SET,
736
      locking.LEVEL_INSTANCE: locking.ALL_SET,
737
    }
738
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
739

    
740
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
741
                  node_result, feedback_fn, master_files,
742
                  drbd_map, vg_name):
743
    """Run multiple tests against a node.
744

745
    Test list:
746

747
      - compares ganeti version
748
      - checks vg existence and size > 20G
749
      - checks config file checksum
750
      - checks ssh to other nodes
751

752
    @type nodeinfo: L{objects.Node}
753
    @param nodeinfo: the node to check
754
    @param file_list: required list of files
755
    @param local_cksum: dictionary of local files and their checksums
756
    @param node_result: the results from the node
757
    @param feedback_fn: function used to accumulate results
758
    @param master_files: list of files that only masters should have
759
    @param drbd_map: the useddrbd minors for this node, in
760
        form of minor: (instance, must_exist) which correspond to instances
761
        and their running status
762
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
763

764
    """
765
    node = nodeinfo.name
766

    
767
    # main result, node_result should be a non-empty dict
768
    if not node_result or not isinstance(node_result, dict):
769
      feedback_fn("  - ERROR: unable to verify node %s." % (node,))
770
      return True
771

    
772
    # compares ganeti version
773
    local_version = constants.PROTOCOL_VERSION
774
    remote_version = node_result.get('version', None)
775
    if not (remote_version and isinstance(remote_version, (list, tuple)) and
776
            len(remote_version) == 2):
777
      feedback_fn("  - ERROR: connection to %s failed" % (node))
778
      return True
779

    
780
    if local_version != remote_version[0]:
781
      feedback_fn("  - ERROR: incompatible protocol versions: master %s,"
782
                  " node %s %s" % (local_version, node, remote_version[0]))
783
      return True
784

    
785
    # node seems compatible, we can actually try to look into its results
786

    
787
    bad = False
788

    
789
    # full package version
790
    if constants.RELEASE_VERSION != remote_version[1]:
791
      feedback_fn("  - WARNING: software version mismatch: master %s,"
792
                  " node %s %s" %
793
                  (constants.RELEASE_VERSION, node, remote_version[1]))
794

    
795
    # checks vg existence and size > 20G
796
    if vg_name is not None:
797
      vglist = node_result.get(constants.NV_VGLIST, None)
798
      if not vglist:
799
        feedback_fn("  - ERROR: unable to check volume groups on node %s." %
800
                        (node,))
801
        bad = True
802
      else:
803
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
804
                                              constants.MIN_VG_SIZE)
805
        if vgstatus:
806
          feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
807
          bad = True
808

    
809
    # checks config file checksum
810

    
811
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
812
    if not isinstance(remote_cksum, dict):
813
      bad = True
814
      feedback_fn("  - ERROR: node hasn't returned file checksum data")
815
    else:
816
      for file_name in file_list:
817
        node_is_mc = nodeinfo.master_candidate
818
        must_have_file = file_name not in master_files
819
        if file_name not in remote_cksum:
820
          if node_is_mc or must_have_file:
821
            bad = True
822
            feedback_fn("  - ERROR: file '%s' missing" % file_name)
823
        elif remote_cksum[file_name] != local_cksum[file_name]:
824
          if node_is_mc or must_have_file:
825
            bad = True
826
            feedback_fn("  - ERROR: file '%s' has wrong checksum" % file_name)
827
          else:
828
            # not candidate and this is not a must-have file
829
            bad = True
830
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
831
                        " candidates (and the file is outdated)" % file_name)
832
        else:
833
          # all good, except non-master/non-must have combination
834
          if not node_is_mc and not must_have_file:
835
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
836
                        " candidates" % file_name)
837

    
838
    # checks ssh to any
839

    
840
    if constants.NV_NODELIST not in node_result:
841
      bad = True
842
      feedback_fn("  - ERROR: node hasn't returned node ssh connectivity data")
843
    else:
844
      if node_result[constants.NV_NODELIST]:
845
        bad = True
846
        for node in node_result[constants.NV_NODELIST]:
847
          feedback_fn("  - ERROR: ssh communication with node '%s': %s" %
848
                          (node, node_result[constants.NV_NODELIST][node]))
849

    
850
    if constants.NV_NODENETTEST not in node_result:
851
      bad = True
852
      feedback_fn("  - ERROR: node hasn't returned node tcp connectivity data")
853
    else:
854
      if node_result[constants.NV_NODENETTEST]:
855
        bad = True
856
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
857
        for node in nlist:
858
          feedback_fn("  - ERROR: tcp communication with node '%s': %s" %
859
                          (node, node_result[constants.NV_NODENETTEST][node]))
860

    
861
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
862
    if isinstance(hyp_result, dict):
863
      for hv_name, hv_result in hyp_result.iteritems():
864
        if hv_result is not None:
865
          feedback_fn("  - ERROR: hypervisor %s verify failure: '%s'" %
866
                      (hv_name, hv_result))
867

    
868
    # check used drbd list
869
    if vg_name is not None:
870
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
871
      if not isinstance(used_minors, (tuple, list)):
872
        feedback_fn("  - ERROR: cannot parse drbd status file: %s" %
873
                    str(used_minors))
874
      else:
875
        for minor, (iname, must_exist) in drbd_map.items():
876
          if minor not in used_minors and must_exist:
877
            feedback_fn("  - ERROR: drbd minor %d of instance %s is"
878
                        " not active" % (minor, iname))
879
            bad = True
880
        for minor in used_minors:
881
          if minor not in drbd_map:
882
            feedback_fn("  - ERROR: unallocated drbd minor %d is in use" %
883
                        minor)
884
            bad = True
885

    
886
    return bad
887

    
888
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
889
                      node_instance, feedback_fn, n_offline):
890
    """Verify an instance.
891

892
    This function checks to see if the required block devices are
893
    available on the instance's node.
894

895
    """
896
    bad = False
897

    
898
    node_current = instanceconfig.primary_node
899

    
900
    node_vol_should = {}
901
    instanceconfig.MapLVsByNode(node_vol_should)
902

    
903
    for node in node_vol_should:
904
      if node in n_offline:
905
        # ignore missing volumes on offline nodes
906
        continue
907
      for volume in node_vol_should[node]:
908
        if node not in node_vol_is or volume not in node_vol_is[node]:
909
          feedback_fn("  - ERROR: volume %s missing on node %s" %
910
                          (volume, node))
911
          bad = True
912

    
913
    if instanceconfig.admin_up:
914
      if ((node_current not in node_instance or
915
          not instance in node_instance[node_current]) and
916
          node_current not in n_offline):
917
        feedback_fn("  - ERROR: instance %s not running on node %s" %
918
                        (instance, node_current))
919
        bad = True
920

    
921
    for node in node_instance:
922
      if (not node == node_current):
923
        if instance in node_instance[node]:
924
          feedback_fn("  - ERROR: instance %s should not run on node %s" %
925
                          (instance, node))
926
          bad = True
927

    
928
    return bad
929

    
930
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
931
    """Verify if there are any unknown volumes in the cluster.
932

933
    The .os, .swap and backup volumes are ignored. All other volumes are
934
    reported as unknown.
935

936
    """
937
    bad = False
938

    
939
    for node in node_vol_is:
940
      for volume in node_vol_is[node]:
941
        if node not in node_vol_should or volume not in node_vol_should[node]:
942
          feedback_fn("  - ERROR: volume %s on node %s should not exist" %
943
                      (volume, node))
944
          bad = True
945
    return bad
946

    
947
  def _VerifyOrphanInstances(self, instancelist, node_instance, feedback_fn):
948
    """Verify the list of running instances.
949

950
    This checks what instances are running but unknown to the cluster.
951

952
    """
953
    bad = False
954
    for node in node_instance:
955
      for runninginstance in node_instance[node]:
956
        if runninginstance not in instancelist:
957
          feedback_fn("  - ERROR: instance %s on node %s should not exist" %
958
                          (runninginstance, node))
959
          bad = True
960
    return bad
961

    
962
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg, feedback_fn):
963
    """Verify N+1 Memory Resilience.
964

965
    Check that if one single node dies we can still start all the instances it
966
    was primary for.
967

968
    """
969
    bad = False
970

    
971
    for node, nodeinfo in node_info.iteritems():
972
      # This code checks that every node which is now listed as secondary has
973
      # enough memory to host all instances it is supposed to should a single
974
      # other node in the cluster fail.
975
      # FIXME: not ready for failover to an arbitrary node
976
      # FIXME: does not support file-backed instances
977
      # WARNING: we currently take into account down instances as well as up
978
      # ones, considering that even if they're down someone might want to start
979
      # them even in the event of a node failure.
980
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
981
        needed_mem = 0
982
        for instance in instances:
983
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
984
          if bep[constants.BE_AUTO_BALANCE]:
985
            needed_mem += bep[constants.BE_MEMORY]
986
        if nodeinfo['mfree'] < needed_mem:
987
          feedback_fn("  - ERROR: not enough memory on node %s to accommodate"
988
                      " failovers should node %s fail" % (node, prinode))
989
          bad = True
990
    return bad
991

    
992
  def CheckPrereq(self):
993
    """Check prerequisites.
994

995
    Transform the list of checks we're going to skip into a set and check that
996
    all its members are valid.
997

998
    """
999
    self.skip_set = frozenset(self.op.skip_checks)
1000
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1001
      raise errors.OpPrereqError("Invalid checks to be skipped specified")
1002

    
1003
  def BuildHooksEnv(self):
1004
    """Build hooks env.
1005

1006
    Cluster-Verify hooks just ran in the post phase and their failure makes
1007
    the output be logged in the verify output and the verification to fail.
1008

1009
    """
1010
    all_nodes = self.cfg.GetNodeList()
1011
    env = {
1012
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1013
      }
1014
    for node in self.cfg.GetAllNodesInfo().values():
1015
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1016

    
1017
    return env, [], all_nodes
1018

    
1019
  def Exec(self, feedback_fn):
1020
    """Verify integrity of cluster, performing various test on nodes.
1021

1022
    """
1023
    bad = False
1024
    feedback_fn("* Verifying global settings")
1025
    for msg in self.cfg.VerifyConfig():
1026
      feedback_fn("  - ERROR: %s" % msg)
1027

    
1028
    vg_name = self.cfg.GetVGName()
1029
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1030
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1031
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1032
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1033
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1034
                        for iname in instancelist)
1035
    i_non_redundant = [] # Non redundant instances
1036
    i_non_a_balanced = [] # Non auto-balanced instances
1037
    n_offline = [] # List of offline nodes
1038
    n_drained = [] # List of nodes being drained
1039
    node_volume = {}
1040
    node_instance = {}
1041
    node_info = {}
1042
    instance_cfg = {}
1043

    
1044
    # FIXME: verify OS list
1045
    # do local checksums
1046
    master_files = [constants.CLUSTER_CONF_FILE]
1047

    
1048
    file_names = ssconf.SimpleStore().GetFileList()
1049
    file_names.append(constants.SSL_CERT_FILE)
1050
    file_names.append(constants.RAPI_CERT_FILE)
1051
    file_names.extend(master_files)
1052

    
1053
    local_checksums = utils.FingerprintFiles(file_names)
1054

    
1055
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1056
    node_verify_param = {
1057
      constants.NV_FILELIST: file_names,
1058
      constants.NV_NODELIST: [node.name for node in nodeinfo
1059
                              if not node.offline],
1060
      constants.NV_HYPERVISOR: hypervisors,
1061
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1062
                                  node.secondary_ip) for node in nodeinfo
1063
                                 if not node.offline],
1064
      constants.NV_INSTANCELIST: hypervisors,
1065
      constants.NV_VERSION: None,
1066
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1067
      }
1068
    if vg_name is not None:
1069
      node_verify_param[constants.NV_VGLIST] = None
1070
      node_verify_param[constants.NV_LVLIST] = vg_name
1071
      node_verify_param[constants.NV_DRBDLIST] = None
1072
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1073
                                           self.cfg.GetClusterName())
1074

    
1075
    cluster = self.cfg.GetClusterInfo()
1076
    master_node = self.cfg.GetMasterNode()
1077
    all_drbd_map = self.cfg.ComputeDRBDMap()
1078

    
1079
    for node_i in nodeinfo:
1080
      node = node_i.name
1081

    
1082
      if node_i.offline:
1083
        feedback_fn("* Skipping offline node %s" % (node,))
1084
        n_offline.append(node)
1085
        continue
1086

    
1087
      if node == master_node:
1088
        ntype = "master"
1089
      elif node_i.master_candidate:
1090
        ntype = "master candidate"
1091
      elif node_i.drained:
1092
        ntype = "drained"
1093
        n_drained.append(node)
1094
      else:
1095
        ntype = "regular"
1096
      feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1097

    
1098
      msg = all_nvinfo[node].fail_msg
1099
      if msg:
1100
        feedback_fn("  - ERROR: while contacting node %s: %s" % (node, msg))
1101
        bad = True
1102
        continue
1103

    
1104
      nresult = all_nvinfo[node].payload
1105
      node_drbd = {}
1106
      for minor, instance in all_drbd_map[node].items():
1107
        if instance not in instanceinfo:
1108
          feedback_fn("  - ERROR: ghost instance '%s' in temporary DRBD map" %
1109
                      instance)
1110
          # ghost instance should not be running, but otherwise we
1111
          # don't give double warnings (both ghost instance and
1112
          # unallocated minor in use)
1113
          node_drbd[minor] = (instance, False)
1114
        else:
1115
          instance = instanceinfo[instance]
1116
          node_drbd[minor] = (instance.name, instance.admin_up)
1117
      result = self._VerifyNode(node_i, file_names, local_checksums,
1118
                                nresult, feedback_fn, master_files,
1119
                                node_drbd, vg_name)
1120
      bad = bad or result
1121

    
1122
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1123
      if vg_name is None:
1124
        node_volume[node] = {}
1125
      elif isinstance(lvdata, basestring):
1126
        feedback_fn("  - ERROR: LVM problem on node %s: %s" %
1127
                    (node, utils.SafeEncode(lvdata)))
1128
        bad = True
1129
        node_volume[node] = {}
1130
      elif not isinstance(lvdata, dict):
1131
        feedback_fn("  - ERROR: connection to %s failed (lvlist)" % (node,))
1132
        bad = True
1133
        continue
1134
      else:
1135
        node_volume[node] = lvdata
1136

    
1137
      # node_instance
1138
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1139
      if not isinstance(idata, list):
1140
        feedback_fn("  - ERROR: connection to %s failed (instancelist)" %
1141
                    (node,))
1142
        bad = True
1143
        continue
1144

    
1145
      node_instance[node] = idata
1146

    
1147
      # node_info
1148
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1149
      if not isinstance(nodeinfo, dict):
1150
        feedback_fn("  - ERROR: connection to %s failed (hvinfo)" % (node,))
1151
        bad = True
1152
        continue
1153

    
1154
      try:
1155
        node_info[node] = {
1156
          "mfree": int(nodeinfo['memory_free']),
1157
          "pinst": [],
1158
          "sinst": [],
1159
          # dictionary holding all instances this node is secondary for,
1160
          # grouped by their primary node. Each key is a cluster node, and each
1161
          # value is a list of instances which have the key as primary and the
1162
          # current node as secondary.  this is handy to calculate N+1 memory
1163
          # availability if you can only failover from a primary to its
1164
          # secondary.
1165
          "sinst-by-pnode": {},
1166
        }
1167
        # FIXME: devise a free space model for file based instances as well
1168
        if vg_name is not None:
1169
          if (constants.NV_VGLIST not in nresult or
1170
              vg_name not in nresult[constants.NV_VGLIST]):
1171
            feedback_fn("  - ERROR: node %s didn't return data for the"
1172
                        " volume group '%s' - it is either missing or broken" %
1173
                        (node, vg_name))
1174
            bad = True
1175
            continue
1176
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1177
      except (ValueError, KeyError):
1178
        feedback_fn("  - ERROR: invalid nodeinfo value returned"
1179
                    " from node %s" % (node,))
1180
        bad = True
1181
        continue
1182

    
1183
    node_vol_should = {}
1184

    
1185
    for instance in instancelist:
1186
      feedback_fn("* Verifying instance %s" % instance)
1187
      inst_config = instanceinfo[instance]
1188
      result =  self._VerifyInstance(instance, inst_config, node_volume,
1189
                                     node_instance, feedback_fn, n_offline)
1190
      bad = bad or result
1191
      inst_nodes_offline = []
1192

    
1193
      inst_config.MapLVsByNode(node_vol_should)
1194

    
1195
      instance_cfg[instance] = inst_config
1196

    
1197
      pnode = inst_config.primary_node
1198
      if pnode in node_info:
1199
        node_info[pnode]['pinst'].append(instance)
1200
      elif pnode not in n_offline:
1201
        feedback_fn("  - ERROR: instance %s, connection to primary node"
1202
                    " %s failed" % (instance, pnode))
1203
        bad = True
1204

    
1205
      if pnode in n_offline:
1206
        inst_nodes_offline.append(pnode)
1207

    
1208
      # If the instance is non-redundant we cannot survive losing its primary
1209
      # node, so we are not N+1 compliant. On the other hand we have no disk
1210
      # templates with more than one secondary so that situation is not well
1211
      # supported either.
1212
      # FIXME: does not support file-backed instances
1213
      if len(inst_config.secondary_nodes) == 0:
1214
        i_non_redundant.append(instance)
1215
      elif len(inst_config.secondary_nodes) > 1:
1216
        feedback_fn("  - WARNING: multiple secondaries for instance %s"
1217
                    % instance)
1218

    
1219
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1220
        i_non_a_balanced.append(instance)
1221

    
1222
      for snode in inst_config.secondary_nodes:
1223
        if snode in node_info:
1224
          node_info[snode]['sinst'].append(instance)
1225
          if pnode not in node_info[snode]['sinst-by-pnode']:
1226
            node_info[snode]['sinst-by-pnode'][pnode] = []
1227
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1228
        elif snode not in n_offline:
1229
          feedback_fn("  - ERROR: instance %s, connection to secondary node"
1230
                      " %s failed" % (instance, snode))
1231
          bad = True
1232
        if snode in n_offline:
1233
          inst_nodes_offline.append(snode)
1234

    
1235
      if inst_nodes_offline:
1236
        # warn that the instance lives on offline nodes, and set bad=True
1237
        feedback_fn("  - ERROR: instance lives on offline node(s) %s" %
1238
                    ", ".join(inst_nodes_offline))
1239
        bad = True
1240

    
1241
    feedback_fn("* Verifying orphan volumes")
1242
    result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
1243
                                       feedback_fn)
1244
    bad = bad or result
1245

    
1246
    feedback_fn("* Verifying remaining instances")
1247
    result = self._VerifyOrphanInstances(instancelist, node_instance,
1248
                                         feedback_fn)
1249
    bad = bad or result
1250

    
1251
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1252
      feedback_fn("* Verifying N+1 Memory redundancy")
1253
      result = self._VerifyNPlusOneMemory(node_info, instance_cfg, feedback_fn)
1254
      bad = bad or result
1255

    
1256
    feedback_fn("* Other Notes")
1257
    if i_non_redundant:
1258
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1259
                  % len(i_non_redundant))
1260

    
1261
    if i_non_a_balanced:
1262
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1263
                  % len(i_non_a_balanced))
1264

    
1265
    if n_offline:
1266
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1267

    
1268
    if n_drained:
1269
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1270

    
1271
    return not bad
1272

    
1273
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1274
    """Analyze the post-hooks' result
1275

1276
    This method analyses the hook result, handles it, and sends some
1277
    nicely-formatted feedback back to the user.
1278

1279
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1280
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1281
    @param hooks_results: the results of the multi-node hooks rpc call
1282
    @param feedback_fn: function used send feedback back to the caller
1283
    @param lu_result: previous Exec result
1284
    @return: the new Exec result, based on the previous result
1285
        and hook results
1286

1287
    """
1288
    # We only really run POST phase hooks, and are only interested in
1289
    # their results
1290
    if phase == constants.HOOKS_PHASE_POST:
1291
      # Used to change hooks' output to proper indentation
1292
      indent_re = re.compile('^', re.M)
1293
      feedback_fn("* Hooks Results")
1294
      if not hooks_results:
1295
        feedback_fn("  - ERROR: general communication failure")
1296
        lu_result = 1
1297
      else:
1298
        for node_name in hooks_results:
1299
          show_node_header = True
1300
          res = hooks_results[node_name]
1301
          msg = res.fail_msg
1302
          if msg:
1303
            if res.offline:
1304
              # no need to warn or set fail return value
1305
              continue
1306
            feedback_fn("    Communication failure in hooks execution: %s" %
1307
                        msg)
1308
            lu_result = 1
1309
            continue
1310
          for script, hkr, output in res.payload:
1311
            if hkr == constants.HKR_FAIL:
1312
              # The node header is only shown once, if there are
1313
              # failing hooks on that node
1314
              if show_node_header:
1315
                feedback_fn("  Node %s:" % node_name)
1316
                show_node_header = False
1317
              feedback_fn("    ERROR: Script %s failed, output:" % script)
1318
              output = indent_re.sub('      ', output)
1319
              feedback_fn("%s" % output)
1320
              lu_result = 1
1321

    
1322
      return lu_result
1323

    
1324

    
1325
class LUVerifyDisks(NoHooksLU):
1326
  """Verifies the cluster disks status.
1327

1328
  """
1329
  _OP_REQP = []
1330
  REQ_BGL = False
1331

    
1332
  def ExpandNames(self):
1333
    self.needed_locks = {
1334
      locking.LEVEL_NODE: locking.ALL_SET,
1335
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1336
    }
1337
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1338

    
1339
  def CheckPrereq(self):
1340
    """Check prerequisites.
1341

1342
    This has no prerequisites.
1343

1344
    """
1345
    pass
1346

    
1347
  def Exec(self, feedback_fn):
1348
    """Verify integrity of cluster disks.
1349

1350
    @rtype: tuple of three items
1351
    @return: a tuple of (dict of node-to-node_error, list of instances
1352
        which need activate-disks, dict of instance: (node, volume) for
1353
        missing volumes
1354

1355
    """
1356
    result = res_nodes, res_instances, res_missing = {}, [], {}
1357

    
1358
    vg_name = self.cfg.GetVGName()
1359
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1360
    instances = [self.cfg.GetInstanceInfo(name)
1361
                 for name in self.cfg.GetInstanceList()]
1362

    
1363
    nv_dict = {}
1364
    for inst in instances:
1365
      inst_lvs = {}
1366
      if (not inst.admin_up or
1367
          inst.disk_template not in constants.DTS_NET_MIRROR):
1368
        continue
1369
      inst.MapLVsByNode(inst_lvs)
1370
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1371
      for node, vol_list in inst_lvs.iteritems():
1372
        for vol in vol_list:
1373
          nv_dict[(node, vol)] = inst
1374

    
1375
    if not nv_dict:
1376
      return result
1377

    
1378
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1379

    
1380
    for node in nodes:
1381
      # node_volume
1382
      node_res = node_lvs[node]
1383
      if node_res.offline:
1384
        continue
1385
      msg = node_res.fail_msg
1386
      if msg:
1387
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1388
        res_nodes[node] = msg
1389
        continue
1390

    
1391
      lvs = node_res.payload
1392
      for lv_name, (_, lv_inactive, lv_online) in lvs.items():
1393
        inst = nv_dict.pop((node, lv_name), None)
1394
        if (not lv_online and inst is not None
1395
            and inst.name not in res_instances):
1396
          res_instances.append(inst.name)
1397

    
1398
    # any leftover items in nv_dict are missing LVs, let's arrange the
1399
    # data better
1400
    for key, inst in nv_dict.iteritems():
1401
      if inst.name not in res_missing:
1402
        res_missing[inst.name] = []
1403
      res_missing[inst.name].append(key)
1404

    
1405
    return result
1406

    
1407

    
1408
class LURenameCluster(LogicalUnit):
1409
  """Rename the cluster.
1410

1411
  """
1412
  HPATH = "cluster-rename"
1413
  HTYPE = constants.HTYPE_CLUSTER
1414
  _OP_REQP = ["name"]
1415

    
1416
  def BuildHooksEnv(self):
1417
    """Build hooks env.
1418

1419
    """
1420
    env = {
1421
      "OP_TARGET": self.cfg.GetClusterName(),
1422
      "NEW_NAME": self.op.name,
1423
      }
1424
    mn = self.cfg.GetMasterNode()
1425
    return env, [mn], [mn]
1426

    
1427
  def CheckPrereq(self):
1428
    """Verify that the passed name is a valid one.
1429

1430
    """
1431
    hostname = utils.HostInfo(self.op.name)
1432

    
1433
    new_name = hostname.name
1434
    self.ip = new_ip = hostname.ip
1435
    old_name = self.cfg.GetClusterName()
1436
    old_ip = self.cfg.GetMasterIP()
1437
    if new_name == old_name and new_ip == old_ip:
1438
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1439
                                 " cluster has changed")
1440
    if new_ip != old_ip:
1441
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1442
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1443
                                   " reachable on the network. Aborting." %
1444
                                   new_ip)
1445

    
1446
    self.op.name = new_name
1447

    
1448
  def Exec(self, feedback_fn):
1449
    """Rename the cluster.
1450

1451
    """
1452
    clustername = self.op.name
1453
    ip = self.ip
1454

    
1455
    # shutdown the master IP
1456
    master = self.cfg.GetMasterNode()
1457
    result = self.rpc.call_node_stop_master(master, False)
1458
    result.Raise("Could not disable the master role")
1459

    
1460
    try:
1461
      cluster = self.cfg.GetClusterInfo()
1462
      cluster.cluster_name = clustername
1463
      cluster.master_ip = ip
1464
      self.cfg.Update(cluster)
1465

    
1466
      # update the known hosts file
1467
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1468
      node_list = self.cfg.GetNodeList()
1469
      try:
1470
        node_list.remove(master)
1471
      except ValueError:
1472
        pass
1473
      result = self.rpc.call_upload_file(node_list,
1474
                                         constants.SSH_KNOWN_HOSTS_FILE)
1475
      for to_node, to_result in result.iteritems():
1476
        msg = to_result.fail_msg
1477
        if msg:
1478
          msg = ("Copy of file %s to node %s failed: %s" %
1479
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1480
          self.proc.LogWarning(msg)
1481

    
1482
    finally:
1483
      result = self.rpc.call_node_start_master(master, False, False)
1484
      msg = result.fail_msg
1485
      if msg:
1486
        self.LogWarning("Could not re-enable the master role on"
1487
                        " the master, please restart manually: %s", msg)
1488

    
1489

    
1490
def _RecursiveCheckIfLVMBased(disk):
1491
  """Check if the given disk or its children are lvm-based.
1492

1493
  @type disk: L{objects.Disk}
1494
  @param disk: the disk to check
1495
  @rtype: boolean
1496
  @return: boolean indicating whether a LD_LV dev_type was found or not
1497

1498
  """
1499
  if disk.children:
1500
    for chdisk in disk.children:
1501
      if _RecursiveCheckIfLVMBased(chdisk):
1502
        return True
1503
  return disk.dev_type == constants.LD_LV
1504

    
1505

    
1506
class LUSetClusterParams(LogicalUnit):
1507
  """Change the parameters of the cluster.
1508

1509
  """
1510
  HPATH = "cluster-modify"
1511
  HTYPE = constants.HTYPE_CLUSTER
1512
  _OP_REQP = []
1513
  REQ_BGL = False
1514

    
1515
  def CheckArguments(self):
1516
    """Check parameters
1517

1518
    """
1519
    if not hasattr(self.op, "candidate_pool_size"):
1520
      self.op.candidate_pool_size = None
1521
    if self.op.candidate_pool_size is not None:
1522
      try:
1523
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1524
      except (ValueError, TypeError), err:
1525
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1526
                                   str(err))
1527
      if self.op.candidate_pool_size < 1:
1528
        raise errors.OpPrereqError("At least one master candidate needed")
1529

    
1530
  def ExpandNames(self):
1531
    # FIXME: in the future maybe other cluster params won't require checking on
1532
    # all nodes to be modified.
1533
    self.needed_locks = {
1534
      locking.LEVEL_NODE: locking.ALL_SET,
1535
    }
1536
    self.share_locks[locking.LEVEL_NODE] = 1
1537

    
1538
  def BuildHooksEnv(self):
1539
    """Build hooks env.
1540

1541
    """
1542
    env = {
1543
      "OP_TARGET": self.cfg.GetClusterName(),
1544
      "NEW_VG_NAME": self.op.vg_name,
1545
      }
1546
    mn = self.cfg.GetMasterNode()
1547
    return env, [mn], [mn]
1548

    
1549
  def CheckPrereq(self):
1550
    """Check prerequisites.
1551

1552
    This checks whether the given params don't conflict and
1553
    if the given volume group is valid.
1554

1555
    """
1556
    if self.op.vg_name is not None and not self.op.vg_name:
1557
      instances = self.cfg.GetAllInstancesInfo().values()
1558
      for inst in instances:
1559
        for disk in inst.disks:
1560
          if _RecursiveCheckIfLVMBased(disk):
1561
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1562
                                       " lvm-based instances exist")
1563

    
1564
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1565

    
1566
    # if vg_name not None, checks given volume group on all nodes
1567
    if self.op.vg_name:
1568
      vglist = self.rpc.call_vg_list(node_list)
1569
      for node in node_list:
1570
        msg = vglist[node].fail_msg
1571
        if msg:
1572
          # ignoring down node
1573
          self.LogWarning("Error while gathering data on node %s"
1574
                          " (ignoring node): %s", node, msg)
1575
          continue
1576
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
1577
                                              self.op.vg_name,
1578
                                              constants.MIN_VG_SIZE)
1579
        if vgstatus:
1580
          raise errors.OpPrereqError("Error on node '%s': %s" %
1581
                                     (node, vgstatus))
1582

    
1583
    self.cluster = cluster = self.cfg.GetClusterInfo()
1584
    # validate params changes
1585
    if self.op.beparams:
1586
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1587
      self.new_beparams = objects.FillDict(
1588
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
1589

    
1590
    if self.op.nicparams:
1591
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
1592
      self.new_nicparams = objects.FillDict(
1593
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
1594
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
1595

    
1596
    # hypervisor list/parameters
1597
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
1598
    if self.op.hvparams:
1599
      if not isinstance(self.op.hvparams, dict):
1600
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
1601
      for hv_name, hv_dict in self.op.hvparams.items():
1602
        if hv_name not in self.new_hvparams:
1603
          self.new_hvparams[hv_name] = hv_dict
1604
        else:
1605
          self.new_hvparams[hv_name].update(hv_dict)
1606

    
1607
    if self.op.enabled_hypervisors is not None:
1608
      self.hv_list = self.op.enabled_hypervisors
1609
      if not self.hv_list:
1610
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
1611
                                   " least one member")
1612
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
1613
      if invalid_hvs:
1614
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
1615
                                   " entries: %s" % invalid_hvs)
1616
    else:
1617
      self.hv_list = cluster.enabled_hypervisors
1618

    
1619
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
1620
      # either the enabled list has changed, or the parameters have, validate
1621
      for hv_name, hv_params in self.new_hvparams.items():
1622
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
1623
            (self.op.enabled_hypervisors and
1624
             hv_name in self.op.enabled_hypervisors)):
1625
          # either this is a new hypervisor, or its parameters have changed
1626
          hv_class = hypervisor.GetHypervisor(hv_name)
1627
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1628
          hv_class.CheckParameterSyntax(hv_params)
1629
          _CheckHVParams(self, node_list, hv_name, hv_params)
1630

    
1631
  def Exec(self, feedback_fn):
1632
    """Change the parameters of the cluster.
1633

1634
    """
1635
    if self.op.vg_name is not None:
1636
      new_volume = self.op.vg_name
1637
      if not new_volume:
1638
        new_volume = None
1639
      if new_volume != self.cfg.GetVGName():
1640
        self.cfg.SetVGName(new_volume)
1641
      else:
1642
        feedback_fn("Cluster LVM configuration already in desired"
1643
                    " state, not changing")
1644
    if self.op.hvparams:
1645
      self.cluster.hvparams = self.new_hvparams
1646
    if self.op.enabled_hypervisors is not None:
1647
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1648
    if self.op.beparams:
1649
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
1650
    if self.op.nicparams:
1651
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
1652

    
1653
    if self.op.candidate_pool_size is not None:
1654
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
1655
      # we need to update the pool size here, otherwise the save will fail
1656
      _AdjustCandidatePool(self)
1657

    
1658
    self.cfg.Update(self.cluster)
1659

    
1660

    
1661
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
1662
  """Distribute additional files which are part of the cluster configuration.
1663

1664
  ConfigWriter takes care of distributing the config and ssconf files, but
1665
  there are more files which should be distributed to all nodes. This function
1666
  makes sure those are copied.
1667

1668
  @param lu: calling logical unit
1669
  @param additional_nodes: list of nodes not in the config to distribute to
1670

1671
  """
1672
  # 1. Gather target nodes
1673
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
1674
  dist_nodes = lu.cfg.GetNodeList()
1675
  if additional_nodes is not None:
1676
    dist_nodes.extend(additional_nodes)
1677
  if myself.name in dist_nodes:
1678
    dist_nodes.remove(myself.name)
1679
  # 2. Gather files to distribute
1680
  dist_files = set([constants.ETC_HOSTS,
1681
                    constants.SSH_KNOWN_HOSTS_FILE,
1682
                    constants.RAPI_CERT_FILE,
1683
                    constants.RAPI_USERS_FILE,
1684
                    constants.HMAC_CLUSTER_KEY,
1685
                   ])
1686

    
1687
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
1688
  for hv_name in enabled_hypervisors:
1689
    hv_class = hypervisor.GetHypervisor(hv_name)
1690
    dist_files.update(hv_class.GetAncillaryFiles())
1691

    
1692
  # 3. Perform the files upload
1693
  for fname in dist_files:
1694
    if os.path.exists(fname):
1695
      result = lu.rpc.call_upload_file(dist_nodes, fname)
1696
      for to_node, to_result in result.items():
1697
        msg = to_result.fail_msg
1698
        if msg:
1699
          msg = ("Copy of file %s to node %s failed: %s" %
1700
                 (fname, to_node, msg))
1701
          lu.proc.LogWarning(msg)
1702

    
1703

    
1704
class LURedistributeConfig(NoHooksLU):
1705
  """Force the redistribution of cluster configuration.
1706

1707
  This is a very simple LU.
1708

1709
  """
1710
  _OP_REQP = []
1711
  REQ_BGL = False
1712

    
1713
  def ExpandNames(self):
1714
    self.needed_locks = {
1715
      locking.LEVEL_NODE: locking.ALL_SET,
1716
    }
1717
    self.share_locks[locking.LEVEL_NODE] = 1
1718

    
1719
  def CheckPrereq(self):
1720
    """Check prerequisites.
1721

1722
    """
1723

    
1724
  def Exec(self, feedback_fn):
1725
    """Redistribute the configuration.
1726

1727
    """
1728
    self.cfg.Update(self.cfg.GetClusterInfo())
1729
    _RedistributeAncillaryFiles(self)
1730

    
1731

    
1732
def _WaitForSync(lu, instance, oneshot=False, unlock=False):
1733
  """Sleep and poll for an instance's disk to sync.
1734

1735
  """
1736
  if not instance.disks:
1737
    return True
1738

    
1739
  if not oneshot:
1740
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
1741

    
1742
  node = instance.primary_node
1743

    
1744
  for dev in instance.disks:
1745
    lu.cfg.SetDiskID(dev, node)
1746

    
1747
  retries = 0
1748
  degr_retries = 10 # in seconds, as we sleep 1 second each time
1749
  while True:
1750
    max_time = 0
1751
    done = True
1752
    cumul_degraded = False
1753
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
1754
    msg = rstats.fail_msg
1755
    if msg:
1756
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
1757
      retries += 1
1758
      if retries >= 10:
1759
        raise errors.RemoteError("Can't contact node %s for mirror data,"
1760
                                 " aborting." % node)
1761
      time.sleep(6)
1762
      continue
1763
    rstats = rstats.payload
1764
    retries = 0
1765
    for i, mstat in enumerate(rstats):
1766
      if mstat is None:
1767
        lu.LogWarning("Can't compute data for node %s/%s",
1768
                           node, instance.disks[i].iv_name)
1769
        continue
1770
      # we ignore the ldisk parameter
1771
      perc_done, est_time, is_degraded, _ = mstat
1772
      cumul_degraded = cumul_degraded or (is_degraded and perc_done is None)
1773
      if perc_done is not None:
1774
        done = False
1775
        if est_time is not None:
1776
          rem_time = "%d estimated seconds remaining" % est_time
1777
          max_time = est_time
1778
        else:
1779
          rem_time = "no time estimate"
1780
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
1781
                        (instance.disks[i].iv_name, perc_done, rem_time))
1782

    
1783
    # if we're done but degraded, let's do a few small retries, to
1784
    # make sure we see a stable and not transient situation; therefore
1785
    # we force restart of the loop
1786
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
1787
      logging.info("Degraded disks found, %d retries left", degr_retries)
1788
      degr_retries -= 1
1789
      time.sleep(1)
1790
      continue
1791

    
1792
    if done or oneshot:
1793
      break
1794

    
1795
    time.sleep(min(60, max_time))
1796

    
1797
  if done:
1798
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
1799
  return not cumul_degraded
1800

    
1801

    
1802
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
1803
  """Check that mirrors are not degraded.
1804

1805
  The ldisk parameter, if True, will change the test from the
1806
  is_degraded attribute (which represents overall non-ok status for
1807
  the device(s)) to the ldisk (representing the local storage status).
1808

1809
  """
1810
  lu.cfg.SetDiskID(dev, node)
1811
  if ldisk:
1812
    idx = 6
1813
  else:
1814
    idx = 5
1815

    
1816
  result = True
1817
  if on_primary or dev.AssembleOnSecondary():
1818
    rstats = lu.rpc.call_blockdev_find(node, dev)
1819
    msg = rstats.fail_msg
1820
    if msg:
1821
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
1822
      result = False
1823
    elif not rstats.payload:
1824
      lu.LogWarning("Can't find disk on node %s", node)
1825
      result = False
1826
    else:
1827
      result = result and (not rstats.payload[idx])
1828
  if dev.children:
1829
    for child in dev.children:
1830
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
1831

    
1832
  return result
1833

    
1834

    
1835
class LUDiagnoseOS(NoHooksLU):
1836
  """Logical unit for OS diagnose/query.
1837

1838
  """
1839
  _OP_REQP = ["output_fields", "names"]
1840
  REQ_BGL = False
1841
  _FIELDS_STATIC = utils.FieldSet()
1842
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status")
1843

    
1844
  def ExpandNames(self):
1845
    if self.op.names:
1846
      raise errors.OpPrereqError("Selective OS query not supported")
1847

    
1848
    _CheckOutputFields(static=self._FIELDS_STATIC,
1849
                       dynamic=self._FIELDS_DYNAMIC,
1850
                       selected=self.op.output_fields)
1851

    
1852
    # Lock all nodes, in shared mode
1853
    # Temporary removal of locks, should be reverted later
1854
    # TODO: reintroduce locks when they are lighter-weight
1855
    self.needed_locks = {}
1856
    #self.share_locks[locking.LEVEL_NODE] = 1
1857
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
1858

    
1859
  def CheckPrereq(self):
1860
    """Check prerequisites.
1861

1862
    """
1863

    
1864
  @staticmethod
1865
  def _DiagnoseByOS(node_list, rlist):
1866
    """Remaps a per-node return list into an a per-os per-node dictionary
1867

1868
    @param node_list: a list with the names of all nodes
1869
    @param rlist: a map with node names as keys and OS objects as values
1870

1871
    @rtype: dict
1872
    @return: a dictionary with osnames as keys and as value another map, with
1873
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
1874

1875
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
1876
                                     (/srv/..., False, "invalid api")],
1877
                           "node2": [(/srv/..., True, "")]}
1878
          }
1879

1880
    """
1881
    all_os = {}
1882
    # we build here the list of nodes that didn't fail the RPC (at RPC
1883
    # level), so that nodes with a non-responding node daemon don't
1884
    # make all OSes invalid
1885
    good_nodes = [node_name for node_name in rlist
1886
                  if not rlist[node_name].fail_msg]
1887
    for node_name, nr in rlist.items():
1888
      if nr.fail_msg or not nr.payload:
1889
        continue
1890
      for name, path, status, diagnose in nr.payload:
1891
        if name not in all_os:
1892
          # build a list of nodes for this os containing empty lists
1893
          # for each node in node_list
1894
          all_os[name] = {}
1895
          for nname in good_nodes:
1896
            all_os[name][nname] = []
1897
        all_os[name][node_name].append((path, status, diagnose))
1898
    return all_os
1899

    
1900
  def Exec(self, feedback_fn):
1901
    """Compute the list of OSes.
1902

1903
    """
1904
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
1905
    node_data = self.rpc.call_os_diagnose(valid_nodes)
1906
    pol = self._DiagnoseByOS(valid_nodes, node_data)
1907
    output = []
1908
    for os_name, os_data in pol.items():
1909
      row = []
1910
      for field in self.op.output_fields:
1911
        if field == "name":
1912
          val = os_name
1913
        elif field == "valid":
1914
          val = utils.all([osl and osl[0][1] for osl in os_data.values()])
1915
        elif field == "node_status":
1916
          # this is just a copy of the dict
1917
          val = {}
1918
          for node_name, nos_list in os_data.items():
1919
            val[node_name] = nos_list
1920
        else:
1921
          raise errors.ParameterError(field)
1922
        row.append(val)
1923
      output.append(row)
1924

    
1925
    return output
1926

    
1927

    
1928
class LURemoveNode(LogicalUnit):
1929
  """Logical unit for removing a node.
1930

1931
  """
1932
  HPATH = "node-remove"
1933
  HTYPE = constants.HTYPE_NODE
1934
  _OP_REQP = ["node_name"]
1935

    
1936
  def BuildHooksEnv(self):
1937
    """Build hooks env.
1938

1939
    This doesn't run on the target node in the pre phase as a failed
1940
    node would then be impossible to remove.
1941

1942
    """
1943
    env = {
1944
      "OP_TARGET": self.op.node_name,
1945
      "NODE_NAME": self.op.node_name,
1946
      }
1947
    all_nodes = self.cfg.GetNodeList()
1948
    all_nodes.remove(self.op.node_name)
1949
    return env, all_nodes, all_nodes
1950

    
1951
  def CheckPrereq(self):
1952
    """Check prerequisites.
1953

1954
    This checks:
1955
     - the node exists in the configuration
1956
     - it does not have primary or secondary instances
1957
     - it's not the master
1958

1959
    Any errors are signaled by raising errors.OpPrereqError.
1960

1961
    """
1962
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
1963
    if node is None:
1964
      raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
1965

    
1966
    instance_list = self.cfg.GetInstanceList()
1967

    
1968
    masternode = self.cfg.GetMasterNode()
1969
    if node.name == masternode:
1970
      raise errors.OpPrereqError("Node is the master node,"
1971
                                 " you need to failover first.")
1972

    
1973
    for instance_name in instance_list:
1974
      instance = self.cfg.GetInstanceInfo(instance_name)
1975
      if node.name in instance.all_nodes:
1976
        raise errors.OpPrereqError("Instance %s is still running on the node,"
1977
                                   " please remove first." % instance_name)
1978
    self.op.node_name = node.name
1979
    self.node = node
1980

    
1981
  def Exec(self, feedback_fn):
1982
    """Removes the node from the cluster.
1983

1984
    """
1985
    node = self.node
1986
    logging.info("Stopping the node daemon and removing configs from node %s",
1987
                 node.name)
1988

    
1989
    self.context.RemoveNode(node.name)
1990

    
1991
    result = self.rpc.call_node_leave_cluster(node.name)
1992
    msg = result.fail_msg
1993
    if msg:
1994
      self.LogWarning("Errors encountered on the remote node while leaving"
1995
                      " the cluster: %s", msg)
1996

    
1997
    # Promote nodes to master candidate as needed
1998
    _AdjustCandidatePool(self)
1999

    
2000

    
2001
class LUQueryNodes(NoHooksLU):
2002
  """Logical unit for querying nodes.
2003

2004
  """
2005
  _OP_REQP = ["output_fields", "names", "use_locking"]
2006
  REQ_BGL = False
2007
  _FIELDS_DYNAMIC = utils.FieldSet(
2008
    "dtotal", "dfree",
2009
    "mtotal", "mnode", "mfree",
2010
    "bootid",
2011
    "ctotal", "cnodes", "csockets",
2012
    )
2013

    
2014
  _FIELDS_STATIC = utils.FieldSet(
2015
    "name", "pinst_cnt", "sinst_cnt",
2016
    "pinst_list", "sinst_list",
2017
    "pip", "sip", "tags",
2018
    "serial_no",
2019
    "master_candidate",
2020
    "master",
2021
    "offline",
2022
    "drained",
2023
    "role",
2024
    )
2025

    
2026
  def ExpandNames(self):
2027
    _CheckOutputFields(static=self._FIELDS_STATIC,
2028
                       dynamic=self._FIELDS_DYNAMIC,
2029
                       selected=self.op.output_fields)
2030

    
2031
    self.needed_locks = {}
2032
    self.share_locks[locking.LEVEL_NODE] = 1
2033

    
2034
    if self.op.names:
2035
      self.wanted = _GetWantedNodes(self, self.op.names)
2036
    else:
2037
      self.wanted = locking.ALL_SET
2038

    
2039
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2040
    self.do_locking = self.do_node_query and self.op.use_locking
2041
    if self.do_locking:
2042
      # if we don't request only static fields, we need to lock the nodes
2043
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2044

    
2045

    
2046
  def CheckPrereq(self):
2047
    """Check prerequisites.
2048

2049
    """
2050
    # The validation of the node list is done in the _GetWantedNodes,
2051
    # if non empty, and if empty, there's no validation to do
2052
    pass
2053

    
2054
  def Exec(self, feedback_fn):
2055
    """Computes the list of nodes and their attributes.
2056

2057
    """
2058
    all_info = self.cfg.GetAllNodesInfo()
2059
    if self.do_locking:
2060
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2061
    elif self.wanted != locking.ALL_SET:
2062
      nodenames = self.wanted
2063
      missing = set(nodenames).difference(all_info.keys())
2064
      if missing:
2065
        raise errors.OpExecError(
2066
          "Some nodes were removed before retrieving their data: %s" % missing)
2067
    else:
2068
      nodenames = all_info.keys()
2069

    
2070
    nodenames = utils.NiceSort(nodenames)
2071
    nodelist = [all_info[name] for name in nodenames]
2072

    
2073
    # begin data gathering
2074

    
2075
    if self.do_node_query:
2076
      live_data = {}
2077
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2078
                                          self.cfg.GetHypervisorType())
2079
      for name in nodenames:
2080
        nodeinfo = node_data[name]
2081
        if not nodeinfo.fail_msg and nodeinfo.payload:
2082
          nodeinfo = nodeinfo.payload
2083
          fn = utils.TryConvert
2084
          live_data[name] = {
2085
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2086
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2087
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2088
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2089
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2090
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2091
            "bootid": nodeinfo.get('bootid', None),
2092
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2093
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2094
            }
2095
        else:
2096
          live_data[name] = {}
2097
    else:
2098
      live_data = dict.fromkeys(nodenames, {})
2099

    
2100
    node_to_primary = dict([(name, set()) for name in nodenames])
2101
    node_to_secondary = dict([(name, set()) for name in nodenames])
2102

    
2103
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2104
                             "sinst_cnt", "sinst_list"))
2105
    if inst_fields & frozenset(self.op.output_fields):
2106
      instancelist = self.cfg.GetInstanceList()
2107

    
2108
      for instance_name in instancelist:
2109
        inst = self.cfg.GetInstanceInfo(instance_name)
2110
        if inst.primary_node in node_to_primary:
2111
          node_to_primary[inst.primary_node].add(inst.name)
2112
        for secnode in inst.secondary_nodes:
2113
          if secnode in node_to_secondary:
2114
            node_to_secondary[secnode].add(inst.name)
2115

    
2116
    master_node = self.cfg.GetMasterNode()
2117

    
2118
    # end data gathering
2119

    
2120
    output = []
2121
    for node in nodelist:
2122
      node_output = []
2123
      for field in self.op.output_fields:
2124
        if field == "name":
2125
          val = node.name
2126
        elif field == "pinst_list":
2127
          val = list(node_to_primary[node.name])
2128
        elif field == "sinst_list":
2129
          val = list(node_to_secondary[node.name])
2130
        elif field == "pinst_cnt":
2131
          val = len(node_to_primary[node.name])
2132
        elif field == "sinst_cnt":
2133
          val = len(node_to_secondary[node.name])
2134
        elif field == "pip":
2135
          val = node.primary_ip
2136
        elif field == "sip":
2137
          val = node.secondary_ip
2138
        elif field == "tags":
2139
          val = list(node.GetTags())
2140
        elif field == "serial_no":
2141
          val = node.serial_no
2142
        elif field == "master_candidate":
2143
          val = node.master_candidate
2144
        elif field == "master":
2145
          val = node.name == master_node
2146
        elif field == "offline":
2147
          val = node.offline
2148
        elif field == "drained":
2149
          val = node.drained
2150
        elif self._FIELDS_DYNAMIC.Matches(field):
2151
          val = live_data[node.name].get(field, None)
2152
        elif field == "role":
2153
          if node.name == master_node:
2154
            val = "M"
2155
          elif node.master_candidate:
2156
            val = "C"
2157
          elif node.drained:
2158
            val = "D"
2159
          elif node.offline:
2160
            val = "O"
2161
          else:
2162
            val = "R"
2163
        else:
2164
          raise errors.ParameterError(field)
2165
        node_output.append(val)
2166
      output.append(node_output)
2167

    
2168
    return output
2169

    
2170

    
2171
class LUQueryNodeVolumes(NoHooksLU):
2172
  """Logical unit for getting volumes on node(s).
2173

2174
  """
2175
  _OP_REQP = ["nodes", "output_fields"]
2176
  REQ_BGL = False
2177
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2178
  _FIELDS_STATIC = utils.FieldSet("node")
2179

    
2180
  def ExpandNames(self):
2181
    _CheckOutputFields(static=self._FIELDS_STATIC,
2182
                       dynamic=self._FIELDS_DYNAMIC,
2183
                       selected=self.op.output_fields)
2184

    
2185
    self.needed_locks = {}
2186
    self.share_locks[locking.LEVEL_NODE] = 1
2187
    if not self.op.nodes:
2188
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2189
    else:
2190
      self.needed_locks[locking.LEVEL_NODE] = \
2191
        _GetWantedNodes(self, self.op.nodes)
2192

    
2193
  def CheckPrereq(self):
2194
    """Check prerequisites.
2195

2196
    This checks that the fields required are valid output fields.
2197

2198
    """
2199
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2200

    
2201
  def Exec(self, feedback_fn):
2202
    """Computes the list of nodes and their attributes.
2203

2204
    """
2205
    nodenames = self.nodes
2206
    volumes = self.rpc.call_node_volumes(nodenames)
2207

    
2208
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2209
             in self.cfg.GetInstanceList()]
2210

    
2211
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2212

    
2213
    output = []
2214
    for node in nodenames:
2215
      nresult = volumes[node]
2216
      if nresult.offline:
2217
        continue
2218
      msg = nresult.fail_msg
2219
      if msg:
2220
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2221
        continue
2222

    
2223
      node_vols = nresult.payload[:]
2224
      node_vols.sort(key=lambda vol: vol['dev'])
2225

    
2226
      for vol in node_vols:
2227
        node_output = []
2228
        for field in self.op.output_fields:
2229
          if field == "node":
2230
            val = node
2231
          elif field == "phys":
2232
            val = vol['dev']
2233
          elif field == "vg":
2234
            val = vol['vg']
2235
          elif field == "name":
2236
            val = vol['name']
2237
          elif field == "size":
2238
            val = int(float(vol['size']))
2239
          elif field == "instance":
2240
            for inst in ilist:
2241
              if node not in lv_by_node[inst]:
2242
                continue
2243
              if vol['name'] in lv_by_node[inst][node]:
2244
                val = inst.name
2245
                break
2246
            else:
2247
              val = '-'
2248
          else:
2249
            raise errors.ParameterError(field)
2250
          node_output.append(str(val))
2251

    
2252
        output.append(node_output)
2253

    
2254
    return output
2255

    
2256

    
2257
class LUAddNode(LogicalUnit):
2258
  """Logical unit for adding node to the cluster.
2259

2260
  """
2261
  HPATH = "node-add"
2262
  HTYPE = constants.HTYPE_NODE
2263
  _OP_REQP = ["node_name"]
2264

    
2265
  def BuildHooksEnv(self):
2266
    """Build hooks env.
2267

2268
    This will run on all nodes before, and on all nodes + the new node after.
2269

2270
    """
2271
    env = {
2272
      "OP_TARGET": self.op.node_name,
2273
      "NODE_NAME": self.op.node_name,
2274
      "NODE_PIP": self.op.primary_ip,
2275
      "NODE_SIP": self.op.secondary_ip,
2276
      }
2277
    nodes_0 = self.cfg.GetNodeList()
2278
    nodes_1 = nodes_0 + [self.op.node_name, ]
2279
    return env, nodes_0, nodes_1
2280

    
2281
  def CheckPrereq(self):
2282
    """Check prerequisites.
2283

2284
    This checks:
2285
     - the new node is not already in the config
2286
     - it is resolvable
2287
     - its parameters (single/dual homed) matches the cluster
2288

2289
    Any errors are signaled by raising errors.OpPrereqError.
2290

2291
    """
2292
    node_name = self.op.node_name
2293
    cfg = self.cfg
2294

    
2295
    dns_data = utils.HostInfo(node_name)
2296

    
2297
    node = dns_data.name
2298
    primary_ip = self.op.primary_ip = dns_data.ip
2299
    secondary_ip = getattr(self.op, "secondary_ip", None)
2300
    if secondary_ip is None:
2301
      secondary_ip = primary_ip
2302
    if not utils.IsValidIP(secondary_ip):
2303
      raise errors.OpPrereqError("Invalid secondary IP given")
2304
    self.op.secondary_ip = secondary_ip
2305

    
2306
    node_list = cfg.GetNodeList()
2307
    if not self.op.readd and node in node_list:
2308
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2309
                                 node)
2310
    elif self.op.readd and node not in node_list:
2311
      raise errors.OpPrereqError("Node %s is not in the configuration" % node)
2312

    
2313
    for existing_node_name in node_list:
2314
      existing_node = cfg.GetNodeInfo(existing_node_name)
2315

    
2316
      if self.op.readd and node == existing_node_name:
2317
        if (existing_node.primary_ip != primary_ip or
2318
            existing_node.secondary_ip != secondary_ip):
2319
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2320
                                     " address configuration as before")
2321
        continue
2322

    
2323
      if (existing_node.primary_ip == primary_ip or
2324
          existing_node.secondary_ip == primary_ip or
2325
          existing_node.primary_ip == secondary_ip or
2326
          existing_node.secondary_ip == secondary_ip):
2327
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2328
                                   " existing node %s" % existing_node.name)
2329

    
2330
    # check that the type of the node (single versus dual homed) is the
2331
    # same as for the master
2332
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2333
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2334
    newbie_singlehomed = secondary_ip == primary_ip
2335
    if master_singlehomed != newbie_singlehomed:
2336
      if master_singlehomed:
2337
        raise errors.OpPrereqError("The master has no private ip but the"
2338
                                   " new node has one")
2339
      else:
2340
        raise errors.OpPrereqError("The master has a private ip but the"
2341
                                   " new node doesn't have one")
2342

    
2343
    # checks reachability
2344
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2345
      raise errors.OpPrereqError("Node not reachable by ping")
2346

    
2347
    if not newbie_singlehomed:
2348
      # check reachability from my secondary ip to newbie's secondary ip
2349
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2350
                           source=myself.secondary_ip):
2351
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2352
                                   " based ping to noded port")
2353

    
2354
    cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2355
    if self.op.readd:
2356
      exceptions = [node]
2357
    else:
2358
      exceptions = []
2359
    mc_now, mc_max = self.cfg.GetMasterCandidateStats(exceptions)
2360
    # the new node will increase mc_max with one, so:
2361
    mc_max = min(mc_max + 1, cp_size)
2362
    self.master_candidate = mc_now < mc_max
2363

    
2364
    if self.op.readd:
2365
      self.new_node = self.cfg.GetNodeInfo(node)
2366
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
2367
    else:
2368
      self.new_node = objects.Node(name=node,
2369
                                   primary_ip=primary_ip,
2370
                                   secondary_ip=secondary_ip,
2371
                                   master_candidate=self.master_candidate,
2372
                                   offline=False, drained=False)
2373

    
2374
  def Exec(self, feedback_fn):
2375
    """Adds the new node to the cluster.
2376

2377
    """
2378
    new_node = self.new_node
2379
    node = new_node.name
2380

    
2381
    # for re-adds, reset the offline/drained/master-candidate flags;
2382
    # we need to reset here, otherwise offline would prevent RPC calls
2383
    # later in the procedure; this also means that if the re-add
2384
    # fails, we are left with a non-offlined, broken node
2385
    if self.op.readd:
2386
      new_node.drained = new_node.offline = False
2387
      self.LogInfo("Readding a node, the offline/drained flags were reset")
2388
      # if we demote the node, we do cleanup later in the procedure
2389
      new_node.master_candidate = self.master_candidate
2390

    
2391
    # notify the user about any possible mc promotion
2392
    if new_node.master_candidate:
2393
      self.LogInfo("Node will be a master candidate")
2394

    
2395
    # check connectivity
2396
    result = self.rpc.call_version([node])[node]
2397
    result.Raise("Can't get version information from node %s" % node)
2398
    if constants.PROTOCOL_VERSION == result.payload:
2399
      logging.info("Communication to node %s fine, sw version %s match",
2400
                   node, result.payload)
2401
    else:
2402
      raise errors.OpExecError("Version mismatch master version %s,"
2403
                               " node version %s" %
2404
                               (constants.PROTOCOL_VERSION, result.payload))
2405

    
2406
    # setup ssh on node
2407
    logging.info("Copy ssh key to node %s", node)
2408
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
2409
    keyarray = []
2410
    keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
2411
                constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
2412
                priv_key, pub_key]
2413

    
2414
    for i in keyfiles:
2415
      f = open(i, 'r')
2416
      try:
2417
        keyarray.append(f.read())
2418
      finally:
2419
        f.close()
2420

    
2421
    result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
2422
                                    keyarray[2],
2423
                                    keyarray[3], keyarray[4], keyarray[5])
2424
    result.Raise("Cannot transfer ssh keys to the new node")
2425

    
2426
    # Add node to our /etc/hosts, and add key to known_hosts
2427
    if self.cfg.GetClusterInfo().modify_etc_hosts:
2428
      utils.AddHostToEtcHosts(new_node.name)
2429

    
2430
    if new_node.secondary_ip != new_node.primary_ip:
2431
      result = self.rpc.call_node_has_ip_address(new_node.name,
2432
                                                 new_node.secondary_ip)
2433
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
2434
                   prereq=True)
2435
      if not result.payload:
2436
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
2437
                                 " you gave (%s). Please fix and re-run this"
2438
                                 " command." % new_node.secondary_ip)
2439

    
2440
    node_verify_list = [self.cfg.GetMasterNode()]
2441
    node_verify_param = {
2442
      'nodelist': [node],
2443
      # TODO: do a node-net-test as well?
2444
    }
2445

    
2446
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2447
                                       self.cfg.GetClusterName())
2448
    for verifier in node_verify_list:
2449
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
2450
      nl_payload = result[verifier].payload['nodelist']
2451
      if nl_payload:
2452
        for failed in nl_payload:
2453
          feedback_fn("ssh/hostname verification failed %s -> %s" %
2454
                      (verifier, nl_payload[failed]))
2455
        raise errors.OpExecError("ssh/hostname verification failed.")
2456

    
2457
    if self.op.readd:
2458
      _RedistributeAncillaryFiles(self)
2459
      self.context.ReaddNode(new_node)
2460
      # make sure we redistribute the config
2461
      self.cfg.Update(new_node)
2462
      # and make sure the new node will not have old files around
2463
      if not new_node.master_candidate:
2464
        result = self.rpc.call_node_demote_from_mc(new_node.name)
2465
        msg = result.RemoteFailMsg()
2466
        if msg:
2467
          self.LogWarning("Node failed to demote itself from master"
2468
                          " candidate status: %s" % msg)
2469
    else:
2470
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
2471
      self.context.AddNode(new_node)
2472

    
2473

    
2474
class LUSetNodeParams(LogicalUnit):
2475
  """Modifies the parameters of a node.
2476

2477
  """
2478
  HPATH = "node-modify"
2479
  HTYPE = constants.HTYPE_NODE
2480
  _OP_REQP = ["node_name"]
2481
  REQ_BGL = False
2482

    
2483
  def CheckArguments(self):
2484
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2485
    if node_name is None:
2486
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2487
    self.op.node_name = node_name
2488
    _CheckBooleanOpField(self.op, 'master_candidate')
2489
    _CheckBooleanOpField(self.op, 'offline')
2490
    _CheckBooleanOpField(self.op, 'drained')
2491
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
2492
    if all_mods.count(None) == 3:
2493
      raise errors.OpPrereqError("Please pass at least one modification")
2494
    if all_mods.count(True) > 1:
2495
      raise errors.OpPrereqError("Can't set the node into more than one"
2496
                                 " state at the same time")
2497

    
2498
  def ExpandNames(self):
2499
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
2500

    
2501
  def BuildHooksEnv(self):
2502
    """Build hooks env.
2503

2504
    This runs on the master node.
2505

2506
    """
2507
    env = {
2508
      "OP_TARGET": self.op.node_name,
2509
      "MASTER_CANDIDATE": str(self.op.master_candidate),
2510
      "OFFLINE": str(self.op.offline),
2511
      "DRAINED": str(self.op.drained),
2512
      }
2513
    nl = [self.cfg.GetMasterNode(),
2514
          self.op.node_name]
2515
    return env, nl, nl
2516

    
2517
  def CheckPrereq(self):
2518
    """Check prerequisites.
2519

2520
    This only checks the instance list against the existing names.
2521

2522
    """
2523
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
2524

    
2525
    if ((self.op.master_candidate == False or self.op.offline == True or
2526
         self.op.drained == True) and node.master_candidate):
2527
      # we will demote the node from master_candidate
2528
      if self.op.node_name == self.cfg.GetMasterNode():
2529
        raise errors.OpPrereqError("The master node has to be a"
2530
                                   " master candidate, online and not drained")
2531
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2532
      num_candidates, _ = self.cfg.GetMasterCandidateStats()
2533
      if num_candidates <= cp_size:
2534
        msg = ("Not enough master candidates (desired"
2535
               " %d, new value will be %d)" % (cp_size, num_candidates-1))
2536
        if self.op.force:
2537
          self.LogWarning(msg)
2538
        else:
2539
          raise errors.OpPrereqError(msg)
2540

    
2541
    if (self.op.master_candidate == True and
2542
        ((node.offline and not self.op.offline == False) or
2543
         (node.drained and not self.op.drained == False))):
2544
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
2545
                                 " to master_candidate" % node.name)
2546

    
2547
    return
2548

    
2549
  def Exec(self, feedback_fn):
2550
    """Modifies a node.
2551

2552
    """
2553
    node = self.node
2554

    
2555
    result = []
2556
    changed_mc = False
2557

    
2558
    if self.op.offline is not None:
2559
      node.offline = self.op.offline
2560
      result.append(("offline", str(self.op.offline)))
2561
      if self.op.offline == True:
2562
        if node.master_candidate:
2563
          node.master_candidate = False
2564
          changed_mc = True
2565
          result.append(("master_candidate", "auto-demotion due to offline"))
2566
        if node.drained:
2567
          node.drained = False
2568
          result.append(("drained", "clear drained status due to offline"))
2569

    
2570
    if self.op.master_candidate is not None:
2571
      node.master_candidate = self.op.master_candidate
2572
      changed_mc = True
2573
      result.append(("master_candidate", str(self.op.master_candidate)))
2574
      if self.op.master_candidate == False:
2575
        rrc = self.rpc.call_node_demote_from_mc(node.name)
2576
        msg = rrc.fail_msg
2577
        if msg:
2578
          self.LogWarning("Node failed to demote itself: %s" % msg)
2579

    
2580
    if self.op.drained is not None:
2581
      node.drained = self.op.drained
2582
      result.append(("drained", str(self.op.drained)))
2583
      if self.op.drained == True:
2584
        if node.master_candidate:
2585
          node.master_candidate = False
2586
          changed_mc = True
2587
          result.append(("master_candidate", "auto-demotion due to drain"))
2588
          rrc = self.rpc.call_node_demote_from_mc(node.name)
2589
          msg = rrc.RemoteFailMsg()
2590
          if msg:
2591
            self.LogWarning("Node failed to demote itself: %s" % msg)
2592
        if node.offline:
2593
          node.offline = False
2594
          result.append(("offline", "clear offline status due to drain"))
2595

    
2596
    # this will trigger configuration file update, if needed
2597
    self.cfg.Update(node)
2598
    # this will trigger job queue propagation or cleanup
2599
    if changed_mc:
2600
      self.context.ReaddNode(node)
2601

    
2602
    return result
2603

    
2604

    
2605
class LUPowercycleNode(NoHooksLU):
2606
  """Powercycles a node.
2607

2608
  """
2609
  _OP_REQP = ["node_name", "force"]
2610
  REQ_BGL = False
2611

    
2612
  def CheckArguments(self):
2613
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2614
    if node_name is None:
2615
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2616
    self.op.node_name = node_name
2617
    if node_name == self.cfg.GetMasterNode() and not self.op.force:
2618
      raise errors.OpPrereqError("The node is the master and the force"
2619
                                 " parameter was not set")
2620

    
2621
  def ExpandNames(self):
2622
    """Locking for PowercycleNode.
2623

2624
    This is a last-resource option and shouldn't block on other
2625
    jobs. Therefore, we grab no locks.
2626

2627
    """
2628
    self.needed_locks = {}
2629

    
2630
  def CheckPrereq(self):
2631
    """Check prerequisites.
2632

2633
    This LU has no prereqs.
2634

2635
    """
2636
    pass
2637

    
2638
  def Exec(self, feedback_fn):
2639
    """Reboots a node.
2640

2641
    """
2642
    result = self.rpc.call_node_powercycle(self.op.node_name,
2643
                                           self.cfg.GetHypervisorType())
2644
    result.Raise("Failed to schedule the reboot")
2645
    return result.payload
2646

    
2647

    
2648
class LUQueryClusterInfo(NoHooksLU):
2649
  """Query cluster configuration.
2650

2651
  """
2652
  _OP_REQP = []
2653
  REQ_BGL = False
2654

    
2655
  def ExpandNames(self):
2656
    self.needed_locks = {}
2657

    
2658
  def CheckPrereq(self):
2659
    """No prerequsites needed for this LU.
2660

2661
    """
2662
    pass
2663

    
2664
  def Exec(self, feedback_fn):
2665
    """Return cluster config.
2666

2667
    """
2668
    cluster = self.cfg.GetClusterInfo()
2669
    result = {
2670
      "software_version": constants.RELEASE_VERSION,
2671
      "protocol_version": constants.PROTOCOL_VERSION,
2672
      "config_version": constants.CONFIG_VERSION,
2673
      "os_api_version": max(constants.OS_API_VERSIONS),
2674
      "export_version": constants.EXPORT_VERSION,
2675
      "architecture": (platform.architecture()[0], platform.machine()),
2676
      "name": cluster.cluster_name,
2677
      "master": cluster.master_node,
2678
      "default_hypervisor": cluster.enabled_hypervisors[0],
2679
      "enabled_hypervisors": cluster.enabled_hypervisors,
2680
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
2681
                        for hypervisor_name in cluster.enabled_hypervisors]),
2682
      "beparams": cluster.beparams,
2683
      "nicparams": cluster.nicparams,
2684
      "candidate_pool_size": cluster.candidate_pool_size,
2685
      "master_netdev": cluster.master_netdev,
2686
      "volume_group_name": cluster.volume_group_name,
2687
      "file_storage_dir": cluster.file_storage_dir,
2688
      }
2689

    
2690
    return result
2691

    
2692

    
2693
class LUQueryConfigValues(NoHooksLU):
2694
  """Return configuration values.
2695

2696
  """
2697
  _OP_REQP = []
2698
  REQ_BGL = False
2699
  _FIELDS_DYNAMIC = utils.FieldSet()
2700
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag")
2701

    
2702
  def ExpandNames(self):
2703
    self.needed_locks = {}
2704

    
2705
    _CheckOutputFields(static=self._FIELDS_STATIC,
2706
                       dynamic=self._FIELDS_DYNAMIC,
2707
                       selected=self.op.output_fields)
2708

    
2709
  def CheckPrereq(self):
2710
    """No prerequisites.
2711

2712
    """
2713
    pass
2714

    
2715
  def Exec(self, feedback_fn):
2716
    """Dump a representation of the cluster config to the standard output.
2717

2718
    """
2719
    values = []
2720
    for field in self.op.output_fields:
2721
      if field == "cluster_name":
2722
        entry = self.cfg.GetClusterName()
2723
      elif field == "master_node":
2724
        entry = self.cfg.GetMasterNode()
2725
      elif field == "drain_flag":
2726
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
2727
      else:
2728
        raise errors.ParameterError(field)
2729
      values.append(entry)
2730
    return values
2731

    
2732

    
2733
class LUActivateInstanceDisks(NoHooksLU):
2734
  """Bring up an instance's disks.
2735

2736
  """
2737
  _OP_REQP = ["instance_name"]
2738
  REQ_BGL = False
2739

    
2740
  def ExpandNames(self):
2741
    self._ExpandAndLockInstance()
2742
    self.needed_locks[locking.LEVEL_NODE] = []
2743
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2744

    
2745
  def DeclareLocks(self, level):
2746
    if level == locking.LEVEL_NODE:
2747
      self._LockInstancesNodes()
2748

    
2749
  def CheckPrereq(self):
2750
    """Check prerequisites.
2751

2752
    This checks that the instance is in the cluster.
2753

2754
    """
2755
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2756
    assert self.instance is not None, \
2757
      "Cannot retrieve locked instance %s" % self.op.instance_name
2758
    _CheckNodeOnline(self, self.instance.primary_node)
2759

    
2760
  def Exec(self, feedback_fn):
2761
    """Activate the disks.
2762

2763
    """
2764
    disks_ok, disks_info = _AssembleInstanceDisks(self, self.instance)
2765
    if not disks_ok:
2766
      raise errors.OpExecError("Cannot activate block devices")
2767

    
2768
    return disks_info
2769

    
2770

    
2771
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
2772
  """Prepare the block devices for an instance.
2773

2774
  This sets up the block devices on all nodes.
2775

2776
  @type lu: L{LogicalUnit}
2777
  @param lu: the logical unit on whose behalf we execute
2778
  @type instance: L{objects.Instance}
2779
  @param instance: the instance for whose disks we assemble
2780
  @type ignore_secondaries: boolean
2781
  @param ignore_secondaries: if true, errors on secondary nodes
2782
      won't result in an error return from the function
2783
  @return: False if the operation failed, otherwise a list of
2784
      (host, instance_visible_name, node_visible_name)
2785
      with the mapping from node devices to instance devices
2786

2787
  """
2788
  device_info = []
2789
  disks_ok = True
2790
  iname = instance.name
2791
  # With the two passes mechanism we try to reduce the window of
2792
  # opportunity for the race condition of switching DRBD to primary
2793
  # before handshaking occured, but we do not eliminate it
2794

    
2795
  # The proper fix would be to wait (with some limits) until the
2796
  # connection has been made and drbd transitions from WFConnection
2797
  # into any other network-connected state (Connected, SyncTarget,
2798
  # SyncSource, etc.)
2799

    
2800
  # 1st pass, assemble on all nodes in secondary mode
2801
  for inst_disk in instance.disks:
2802
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2803
      lu.cfg.SetDiskID(node_disk, node)
2804
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
2805
      msg = result.fail_msg
2806
      if msg:
2807
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
2808
                           " (is_primary=False, pass=1): %s",
2809
                           inst_disk.iv_name, node, msg)
2810
        if not ignore_secondaries:
2811
          disks_ok = False
2812

    
2813
  # FIXME: race condition on drbd migration to primary
2814

    
2815
  # 2nd pass, do only the primary node
2816
  for inst_disk in instance.disks:
2817
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2818
      if node != instance.primary_node:
2819
        continue
2820
      lu.cfg.SetDiskID(node_disk, node)
2821
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
2822
      msg = result.fail_msg
2823
      if msg:
2824
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
2825
                           " (is_primary=True, pass=2): %s",
2826
                           inst_disk.iv_name, node, msg)
2827
        disks_ok = False
2828
    device_info.append((instance.primary_node, inst_disk.iv_name,
2829
                        result.payload))
2830

    
2831
  # leave the disks configured for the primary node
2832
  # this is a workaround that would be fixed better by
2833
  # improving the logical/physical id handling
2834
  for disk in instance.disks:
2835
    lu.cfg.SetDiskID(disk, instance.primary_node)
2836

    
2837
  return disks_ok, device_info
2838

    
2839

    
2840
def _StartInstanceDisks(lu, instance, force):
2841
  """Start the disks of an instance.
2842

2843
  """
2844
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
2845
                                           ignore_secondaries=force)
2846
  if not disks_ok:
2847
    _ShutdownInstanceDisks(lu, instance)
2848
    if force is not None and not force:
2849
      lu.proc.LogWarning("", hint="If the message above refers to a"
2850
                         " secondary node,"
2851
                         " you can retry the operation using '--force'.")
2852
    raise errors.OpExecError("Disk consistency error")
2853

    
2854

    
2855
class LUDeactivateInstanceDisks(NoHooksLU):
2856
  """Shutdown an instance's disks.
2857

2858
  """
2859
  _OP_REQP = ["instance_name"]
2860
  REQ_BGL = False
2861

    
2862
  def ExpandNames(self):
2863
    self._ExpandAndLockInstance()
2864
    self.needed_locks[locking.LEVEL_NODE] = []
2865
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2866

    
2867
  def DeclareLocks(self, level):
2868
    if level == locking.LEVEL_NODE:
2869
      self._LockInstancesNodes()
2870

    
2871
  def CheckPrereq(self):
2872
    """Check prerequisites.
2873

2874
    This checks that the instance is in the cluster.
2875

2876
    """
2877
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2878
    assert self.instance is not None, \
2879
      "Cannot retrieve locked instance %s" % self.op.instance_name
2880

    
2881
  def Exec(self, feedback_fn):
2882
    """Deactivate the disks
2883

2884
    """
2885
    instance = self.instance
2886
    _SafeShutdownInstanceDisks(self, instance)
2887

    
2888

    
2889
def _SafeShutdownInstanceDisks(lu, instance):
2890
  """Shutdown block devices of an instance.
2891

2892
  This function checks if an instance is running, before calling
2893
  _ShutdownInstanceDisks.
2894

2895
  """
2896
  pnode = instance.primary_node
2897
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
2898
  ins_l.Raise("Can't contact node %s" % pnode)
2899

    
2900
  if instance.name in ins_l.payload:
2901
    raise errors.OpExecError("Instance is running, can't shutdown"
2902
                             " block devices.")
2903

    
2904
  _ShutdownInstanceDisks(lu, instance)
2905

    
2906

    
2907
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
2908
  """Shutdown block devices of an instance.
2909

2910
  This does the shutdown on all nodes of the instance.
2911

2912
  If the ignore_primary is false, errors on the primary node are
2913
  ignored.
2914

2915
  """
2916
  all_result = True
2917
  for disk in instance.disks:
2918
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
2919
      lu.cfg.SetDiskID(top_disk, node)
2920
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
2921
      msg = result.fail_msg
2922
      if msg:
2923
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
2924
                      disk.iv_name, node, msg)
2925
        if not ignore_primary or node != instance.primary_node:
2926
          all_result = False
2927
  return all_result
2928

    
2929

    
2930
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
2931
  """Checks if a node has enough free memory.
2932

2933
  This function check if a given node has the needed amount of free
2934
  memory. In case the node has less memory or we cannot get the
2935
  information from the node, this function raise an OpPrereqError
2936
  exception.
2937

2938
  @type lu: C{LogicalUnit}
2939
  @param lu: a logical unit from which we get configuration data
2940
  @type node: C{str}
2941
  @param node: the node to check
2942
  @type reason: C{str}
2943
  @param reason: string to use in the error message
2944
  @type requested: C{int}
2945
  @param requested: the amount of memory in MiB to check for
2946
  @type hypervisor_name: C{str}
2947
  @param hypervisor_name: the hypervisor to ask for memory stats
2948
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
2949
      we cannot check the node
2950

2951
  """
2952
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
2953
  nodeinfo[node].Raise("Can't get data from node %s" % node, prereq=True)
2954
  free_mem = nodeinfo[node].payload.get('memory_free', None)
2955
  if not isinstance(free_mem, int):
2956
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
2957
                               " was '%s'" % (node, free_mem))
2958
  if requested > free_mem:
2959
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
2960
                               " needed %s MiB, available %s MiB" %
2961
                               (node, reason, requested, free_mem))
2962

    
2963

    
2964
class LUStartupInstance(LogicalUnit):
2965
  """Starts an instance.
2966

2967
  """
2968
  HPATH = "instance-start"
2969
  HTYPE = constants.HTYPE_INSTANCE
2970
  _OP_REQP = ["instance_name", "force"]
2971
  REQ_BGL = False
2972

    
2973
  def ExpandNames(self):
2974
    self._ExpandAndLockInstance()
2975

    
2976
  def BuildHooksEnv(self):
2977
    """Build hooks env.
2978

2979
    This runs on master, primary and secondary nodes of the instance.
2980

2981
    """
2982
    env = {
2983
      "FORCE": self.op.force,
2984
      }
2985
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
2986
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
2987
    return env, nl, nl
2988

    
2989
  def CheckPrereq(self):
2990
    """Check prerequisites.
2991

2992
    This checks that the instance is in the cluster.
2993

2994
    """
2995
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2996
    assert self.instance is not None, \
2997
      "Cannot retrieve locked instance %s" % self.op.instance_name
2998

    
2999
    # extra beparams
3000
    self.beparams = getattr(self.op, "beparams", {})
3001
    if self.beparams:
3002
      if not isinstance(self.beparams, dict):
3003
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3004
                                   " dict" % (type(self.beparams), ))
3005
      # fill the beparams dict
3006
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3007
      self.op.beparams = self.beparams
3008

    
3009
    # extra hvparams
3010
    self.hvparams = getattr(self.op, "hvparams", {})
3011
    if self.hvparams:
3012
      if not isinstance(self.hvparams, dict):
3013
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3014
                                   " dict" % (type(self.hvparams), ))
3015

    
3016
      # check hypervisor parameter syntax (locally)
3017
      cluster = self.cfg.GetClusterInfo()
3018
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3019
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3020
                                    instance.hvparams)
3021
      filled_hvp.update(self.hvparams)
3022
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3023
      hv_type.CheckParameterSyntax(filled_hvp)
3024
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3025
      self.op.hvparams = self.hvparams
3026

    
3027
    _CheckNodeOnline(self, instance.primary_node)
3028

    
3029
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3030
    # check bridges existence
3031
    _CheckInstanceBridgesExist(self, instance)
3032

    
3033
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3034
                                              instance.name,
3035
                                              instance.hypervisor)
3036
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3037
                      prereq=True)
3038
    if not remote_info.payload: # not running already
3039
      _CheckNodeFreeMemory(self, instance.primary_node,
3040
                           "starting instance %s" % instance.name,
3041
                           bep[constants.BE_MEMORY], instance.hypervisor)
3042

    
3043
  def Exec(self, feedback_fn):
3044
    """Start the instance.
3045

3046
    """
3047
    instance = self.instance
3048
    force = self.op.force
3049

    
3050
    self.cfg.MarkInstanceUp(instance.name)
3051

    
3052
    node_current = instance.primary_node
3053

    
3054
    _StartInstanceDisks(self, instance, force)
3055

    
3056
    result = self.rpc.call_instance_start(node_current, instance,
3057
                                          self.hvparams, self.beparams)
3058
    msg = result.fail_msg
3059
    if msg:
3060
      _ShutdownInstanceDisks(self, instance)
3061
      raise errors.OpExecError("Could not start instance: %s" % msg)
3062

    
3063

    
3064
class LURebootInstance(LogicalUnit):
3065
  """Reboot an instance.
3066

3067
  """
3068
  HPATH = "instance-reboot"
3069
  HTYPE = constants.HTYPE_INSTANCE
3070
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3071
  REQ_BGL = False
3072

    
3073
  def ExpandNames(self):
3074
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3075
                                   constants.INSTANCE_REBOOT_HARD,
3076
                                   constants.INSTANCE_REBOOT_FULL]:
3077
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3078
                                  (constants.INSTANCE_REBOOT_SOFT,
3079
                                   constants.INSTANCE_REBOOT_HARD,
3080
                                   constants.INSTANCE_REBOOT_FULL))
3081
    self._ExpandAndLockInstance()
3082

    
3083
  def BuildHooksEnv(self):
3084
    """Build hooks env.
3085

3086
    This runs on master, primary and secondary nodes of the instance.
3087

3088
    """
3089
    env = {
3090
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3091
      "REBOOT_TYPE": self.op.reboot_type,
3092
      }
3093
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3094
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3095
    return env, nl, nl
3096

    
3097
  def CheckPrereq(self):
3098
    """Check prerequisites.
3099

3100
    This checks that the instance is in the cluster.
3101

3102
    """
3103
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3104
    assert self.instance is not None, \
3105
      "Cannot retrieve locked instance %s" % self.op.instance_name
3106

    
3107
    _CheckNodeOnline(self, instance.primary_node)
3108

    
3109
    # check bridges existence
3110
    _CheckInstanceBridgesExist(self, instance)
3111

    
3112
  def Exec(self, feedback_fn):
3113
    """Reboot the instance.
3114

3115
    """
3116
    instance = self.instance
3117
    ignore_secondaries = self.op.ignore_secondaries
3118
    reboot_type = self.op.reboot_type
3119

    
3120
    node_current = instance.primary_node
3121

    
3122
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3123
                       constants.INSTANCE_REBOOT_HARD]:
3124
      for disk in instance.disks:
3125
        self.cfg.SetDiskID(disk, node_current)
3126
      result = self.rpc.call_instance_reboot(node_current, instance,
3127
                                             reboot_type)
3128
      result.Raise("Could not reboot instance")
3129
    else:
3130
      result = self.rpc.call_instance_shutdown(node_current, instance)
3131
      result.Raise("Could not shutdown instance for full reboot")
3132
      _ShutdownInstanceDisks(self, instance)
3133
      _StartInstanceDisks(self, instance, ignore_secondaries)
3134
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3135
      msg = result.fail_msg
3136
      if msg:
3137
        _ShutdownInstanceDisks(self, instance)
3138
        raise errors.OpExecError("Could not start instance for"
3139
                                 " full reboot: %s" % msg)
3140

    
3141
    self.cfg.MarkInstanceUp(instance.name)
3142

    
3143

    
3144
class LUShutdownInstance(LogicalUnit):
3145
  """Shutdown an instance.
3146

3147
  """
3148
  HPATH = "instance-stop"
3149
  HTYPE = constants.HTYPE_INSTANCE
3150
  _OP_REQP = ["instance_name"]
3151
  REQ_BGL = False
3152

    
3153
  def ExpandNames(self):
3154
    self._ExpandAndLockInstance()
3155

    
3156
  def BuildHooksEnv(self):
3157
    """Build hooks env.
3158

3159
    This runs on master, primary and secondary nodes of the instance.
3160

3161
    """
3162
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3163
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3164
    return env, nl, nl
3165

    
3166
  def CheckPrereq(self):
3167
    """Check prerequisites.
3168

3169
    This checks that the instance is in the cluster.
3170

3171
    """
3172
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3173
    assert self.instance is not None, \
3174
      "Cannot retrieve locked instance %s" % self.op.instance_name
3175
    _CheckNodeOnline(self, self.instance.primary_node)
3176

    
3177
  def Exec(self, feedback_fn):
3178
    """Shutdown the instance.
3179

3180
    """
3181
    instance = self.instance
3182
    node_current = instance.primary_node
3183
    self.cfg.MarkInstanceDown(instance.name)
3184
    result = self.rpc.call_instance_shutdown(node_current, instance)
3185
    msg = result.fail_msg
3186
    if msg:
3187
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3188

    
3189
    _ShutdownInstanceDisks(self, instance)
3190

    
3191

    
3192
class LUReinstallInstance(LogicalUnit):
3193
  """Reinstall an instance.
3194

3195
  """
3196
  HPATH = "instance-reinstall"
3197
  HTYPE = constants.HTYPE_INSTANCE
3198
  _OP_REQP = ["instance_name"]
3199
  REQ_BGL = False
3200

    
3201
  def ExpandNames(self):
3202
    self._ExpandAndLockInstance()
3203

    
3204
  def BuildHooksEnv(self):
3205
    """Build hooks env.
3206

3207
    This runs on master, primary and secondary nodes of the instance.
3208

3209
    """
3210
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3211
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3212
    return env, nl, nl
3213

    
3214
  def CheckPrereq(self):
3215
    """Check prerequisites.
3216

3217
    This checks that the instance is in the cluster and is not running.
3218

3219
    """
3220
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3221
    assert instance is not None, \
3222
      "Cannot retrieve locked instance %s" % self.op.instance_name
3223
    _CheckNodeOnline(self, instance.primary_node)
3224

    
3225
    if instance.disk_template == constants.DT_DISKLESS:
3226
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3227
                                 self.op.instance_name)
3228
    if instance.admin_up:
3229
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3230
                                 self.op.instance_name)
3231
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3232
                                              instance.name,
3233
                                              instance.hypervisor)
3234
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3235
                      prereq=True)
3236
    if remote_info.payload:
3237
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3238
                                 (self.op.instance_name,
3239
                                  instance.primary_node))
3240

    
3241
    self.op.os_type = getattr(self.op, "os_type", None)
3242
    if self.op.os_type is not None:
3243
      # OS verification
3244
      pnode = self.cfg.GetNodeInfo(
3245
        self.cfg.ExpandNodeName(instance.primary_node))
3246
      if pnode is None:
3247
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
3248
                                   self.op.pnode)
3249
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3250
      result.Raise("OS '%s' not in supported OS list for primary node %s" %
3251
                   (self.op.os_type, pnode.name), prereq=True)
3252

    
3253
    self.instance = instance
3254

    
3255
  def Exec(self, feedback_fn):
3256
    """Reinstall the instance.
3257

3258
    """
3259
    inst = self.instance
3260

    
3261
    if self.op.os_type is not None:
3262
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3263
      inst.os = self.op.os_type
3264
      self.cfg.Update(inst)
3265

    
3266
    _StartInstanceDisks(self, inst, None)
3267
    try:
3268
      feedback_fn("Running the instance OS create scripts...")
3269
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
3270
      result.Raise("Could not install OS for instance %s on node %s" %
3271
                   (inst.name, inst.primary_node))
3272
    finally:
3273
      _ShutdownInstanceDisks(self, inst)
3274

    
3275

    
3276
class LURenameInstance(LogicalUnit):
3277
  """Rename an instance.
3278

3279
  """
3280
  HPATH = "instance-rename"
3281
  HTYPE = constants.HTYPE_INSTANCE
3282
  _OP_REQP = ["instance_name", "new_name"]
3283

    
3284
  def BuildHooksEnv(self):
3285
    """Build hooks env.
3286

3287
    This runs on master, primary and secondary nodes of the instance.
3288

3289
    """
3290
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3291
    env["INSTANCE_NEW_NAME"] = self.op.new_name
3292
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3293
    return env, nl, nl
3294

    
3295
  def CheckPrereq(self):
3296
    """Check prerequisites.
3297

3298
    This checks that the instance is in the cluster and is not running.
3299

3300
    """
3301
    instance = self.cfg.GetInstanceInfo(
3302
      self.cfg.ExpandInstanceName(self.op.instance_name))
3303
    if instance is None:
3304
      raise errors.OpPrereqError("Instance '%s' not known" %
3305
                                 self.op.instance_name)
3306
    _CheckNodeOnline(self, instance.primary_node)
3307

    
3308
    if instance.admin_up:
3309
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3310
                                 self.op.instance_name)
3311
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3312
                                              instance.name,
3313
                                              instance.hypervisor)
3314
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3315
                      prereq=True)
3316
    if remote_info.payload:
3317
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3318
                                 (self.op.instance_name,
3319
                                  instance.primary_node))
3320
    self.instance = instance
3321

    
3322
    # new name verification
3323
    name_info = utils.HostInfo(self.op.new_name)
3324

    
3325
    self.op.new_name = new_name = name_info.name
3326
    instance_list = self.cfg.GetInstanceList()
3327
    if new_name in instance_list:
3328
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3329
                                 new_name)
3330

    
3331
    if not getattr(self.op, "ignore_ip", False):
3332
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
3333
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
3334
                                   (name_info.ip, new_name))
3335

    
3336

    
3337
  def Exec(self, feedback_fn):
3338
    """Reinstall the instance.
3339

3340
    """
3341
    inst = self.instance
3342
    old_name = inst.name
3343

    
3344
    if inst.disk_template == constants.DT_FILE:
3345
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3346

    
3347
    self.cfg.RenameInstance(inst.name, self.op.new_name)
3348
    # Change the instance lock. This is definitely safe while we hold the BGL
3349
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
3350
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
3351

    
3352
    # re-read the instance from the configuration after rename
3353
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
3354

    
3355
    if inst.disk_template == constants.DT_FILE:
3356
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3357
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
3358
                                                     old_file_storage_dir,
3359
                                                     new_file_storage_dir)
3360
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
3361
                   " (but the instance has been renamed in Ganeti)" %
3362
                   (inst.primary_node, old_file_storage_dir,
3363
                    new_file_storage_dir))
3364

    
3365
    _StartInstanceDisks(self, inst, None)
3366
    try:
3367
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
3368
                                                 old_name)
3369
      msg = result.fail_msg
3370
      if msg:
3371
        msg = ("Could not run OS rename script for instance %s on node %s"
3372
               " (but the instance has been renamed in Ganeti): %s" %
3373
               (inst.name, inst.primary_node, msg))
3374
        self.proc.LogWarning(msg)
3375
    finally:
3376
      _ShutdownInstanceDisks(self, inst)
3377

    
3378

    
3379
class LURemoveInstance(LogicalUnit):
3380
  """Remove an instance.
3381

3382
  """
3383
  HPATH = "instance-remove"
3384
  HTYPE = constants.HTYPE_INSTANCE
3385
  _OP_REQP = ["instance_name", "ignore_failures"]
3386
  REQ_BGL = False
3387

    
3388
  def ExpandNames(self):
3389
    self._ExpandAndLockInstance()
3390
    self.needed_locks[locking.LEVEL_NODE] = []
3391
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3392

    
3393
  def DeclareLocks(self, level):
3394
    if level == locking.LEVEL_NODE:
3395
      self._LockInstancesNodes()
3396

    
3397
  def BuildHooksEnv(self):
3398
    """Build hooks env.
3399

3400
    This runs on master, primary and secondary nodes of the instance.
3401

3402
    """
3403
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3404
    nl = [self.cfg.GetMasterNode()]
3405
    return env, nl, nl
3406

    
3407
  def CheckPrereq(self):
3408
    """Check prerequisites.
3409

3410
    This checks that the instance is in the cluster.
3411

3412
    """
3413
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3414
    assert self.instance is not None, \
3415
      "Cannot retrieve locked instance %s" % self.op.instance_name
3416

    
3417
  def Exec(self, feedback_fn):
3418
    """Remove the instance.
3419

3420
    """
3421
    instance = self.instance
3422
    logging.info("Shutting down instance %s on node %s",
3423
                 instance.name, instance.primary_node)
3424

    
3425
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
3426
    msg = result.fail_msg
3427
    if msg:
3428
      if self.op.ignore_failures:
3429
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
3430
      else:
3431
        raise errors.OpExecError("Could not shutdown instance %s on"
3432
                                 " node %s: %s" %
3433
                                 (instance.name, instance.primary_node, msg))
3434

    
3435
    logging.info("Removing block devices for instance %s", instance.name)
3436

    
3437
    if not _RemoveDisks(self, instance):
3438
      if self.op.ignore_failures:
3439
        feedback_fn("Warning: can't remove instance's disks")
3440
      else:
3441
        raise errors.OpExecError("Can't remove instance's disks")
3442

    
3443
    logging.info("Removing instance %s out of cluster config", instance.name)
3444

    
3445
    self.cfg.RemoveInstance(instance.name)
3446
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
3447

    
3448

    
3449
class LUQueryInstances(NoHooksLU):
3450
  """Logical unit for querying instances.
3451

3452
  """
3453
  _OP_REQP = ["output_fields", "names", "use_locking"]
3454
  REQ_BGL = False
3455
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
3456
                                    "admin_state",
3457
                                    "disk_template", "ip", "mac", "bridge",
3458
                                    "nic_mode", "nic_link",
3459
                                    "sda_size", "sdb_size", "vcpus", "tags",
3460
                                    "network_port", "beparams",
3461
                                    r"(disk)\.(size)/([0-9]+)",
3462
                                    r"(disk)\.(sizes)", "disk_usage",
3463
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
3464
                                    r"(nic)\.(bridge)/([0-9]+)",
3465
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
3466
                                    r"(disk|nic)\.(count)",
3467
                                    "serial_no", "hypervisor", "hvparams",] +
3468
                                  ["hv/%s" % name
3469
                                   for name in constants.HVS_PARAMETERS] +
3470
                                  ["be/%s" % name
3471
                                   for name in constants.BES_PARAMETERS])
3472
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
3473

    
3474

    
3475
  def ExpandNames(self):
3476
    _CheckOutputFields(static=self._FIELDS_STATIC,
3477
                       dynamic=self._FIELDS_DYNAMIC,
3478
                       selected=self.op.output_fields)
3479

    
3480
    self.needed_locks = {}
3481
    self.share_locks[locking.LEVEL_INSTANCE] = 1
3482
    self.share_locks[locking.LEVEL_NODE] = 1
3483

    
3484
    if self.op.names:
3485
      self.wanted = _GetWantedInstances(self, self.op.names)
3486
    else:
3487
      self.wanted = locking.ALL_SET
3488

    
3489
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3490
    self.do_locking = self.do_node_query and self.op.use_locking
3491
    if self.do_locking:
3492
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3493
      self.needed_locks[locking.LEVEL_NODE] = []
3494
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3495

    
3496
  def DeclareLocks(self, level):
3497
    if level == locking.LEVEL_NODE and self.do_locking:
3498
      self._LockInstancesNodes()
3499

    
3500
  def CheckPrereq(self):
3501
    """Check prerequisites.
3502

3503
    """
3504
    pass
3505

    
3506
  def Exec(self, feedback_fn):
3507
    """Computes the list of nodes and their attributes.
3508

3509
    """
3510
    all_info = self.cfg.GetAllInstancesInfo()
3511
    if self.wanted == locking.ALL_SET:
3512
      # caller didn't specify instance names, so ordering is not important
3513
      if self.do_locking:
3514
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
3515
      else:
3516
        instance_names = all_info.keys()
3517
      instance_names = utils.NiceSort(instance_names)
3518
    else:
3519
      # caller did specify names, so we must keep the ordering
3520
      if self.do_locking:
3521
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
3522
      else:
3523
        tgt_set = all_info.keys()
3524
      missing = set(self.wanted).difference(tgt_set)
3525
      if missing:
3526
        raise errors.OpExecError("Some instances were removed before"
3527
                                 " retrieving their data: %s" % missing)
3528
      instance_names = self.wanted
3529

    
3530
    instance_list = [all_info[iname] for iname in instance_names]
3531

    
3532
    # begin data gathering
3533

    
3534
    nodes = frozenset([inst.primary_node for inst in instance_list])
3535
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
3536

    
3537
    bad_nodes = []
3538
    off_nodes = []
3539
    if self.do_node_query:
3540
      live_data = {}
3541
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
3542
      for name in nodes:
3543
        result = node_data[name]
3544
        if result.offline:
3545
          # offline nodes will be in both lists
3546
          off_nodes.append(name)
3547
        if result.failed or result.fail_msg:
3548
          bad_nodes.append(name)
3549
        else:
3550
          if result.payload:
3551
            live_data.update(result.payload)
3552
          # else no instance is alive
3553
    else:
3554
      live_data = dict([(name, {}) for name in instance_names])
3555

    
3556
    # end data gathering
3557

    
3558
    HVPREFIX = "hv/"
3559
    BEPREFIX = "be/"
3560
    output = []
3561
    cluster = self.cfg.GetClusterInfo()
3562
    for instance in instance_list:
3563
      iout = []
3564
      i_hv = cluster.FillHV(instance)
3565
      i_be = cluster.FillBE(instance)
3566
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
3567
                                 nic.nicparams) for nic in instance.nics]
3568
      for field in self.op.output_fields:
3569
        st_match = self._FIELDS_STATIC.Matches(field)
3570
        if field == "name":
3571
          val = instance.name
3572
        elif field == "os":
3573
          val = instance.os
3574
        elif field == "pnode":
3575
          val = instance.primary_node
3576
        elif field == "snodes":
3577
          val = list(instance.secondary_nodes)
3578
        elif field == "admin_state":
3579
          val = instance.admin_up
3580
        elif field == "oper_state":
3581
          if instance.primary_node in bad_nodes:
3582
            val = None
3583
          else:
3584
            val = bool(live_data.get(instance.name))
3585
        elif field == "status":
3586
          if instance.primary_node in off_nodes:
3587
            val = "ERROR_nodeoffline"
3588
          elif instance.primary_node in bad_nodes:
3589
            val = "ERROR_nodedown"
3590
          else:
3591
            running = bool(live_data.get(instance.name))
3592
            if running:
3593
              if instance.admin_up:
3594
                val = "running"
3595
              else:
3596
                val = "ERROR_up"
3597
            else:
3598
              if instance.admin_up:
3599
                val = "ERROR_down"
3600
              else:
3601
                val = "ADMIN_down"
3602
        elif field == "oper_ram":
3603
          if instance.primary_node in bad_nodes:
3604
            val = None
3605
          elif instance.name in live_data:
3606
            val = live_data[instance.name].get("memory", "?")
3607
          else:
3608
            val = "-"
3609
        elif field == "vcpus":
3610
          val = i_be[constants.BE_VCPUS]
3611
        elif field == "disk_template":
3612
          val = instance.disk_template
3613
        elif field == "ip":
3614
          if instance.nics:
3615
            val = instance.nics[0].ip
3616
          else:
3617
            val = None
3618
        elif field == "nic_mode":
3619
          if instance.nics:
3620
            val = i_nicp[0][constants.NIC_MODE]
3621
          else:
3622
            val = None
3623
        elif field == "nic_link":
3624
          if instance.nics:
3625
            val = i_nicp[0][constants.NIC_LINK]
3626
          else:
3627
            val = None
3628
        elif field == "bridge":
3629
          if (instance.nics and
3630
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
3631
            val = i_nicp[0][constants.NIC_LINK]
3632
          else:
3633
            val = None
3634
        elif field == "mac":
3635
          if instance.nics:
3636
            val = instance.nics[0].mac
3637
          else:
3638
            val = None
3639
        elif field == "sda_size" or field == "sdb_size":
3640
          idx = ord(field[2]) - ord('a')
3641
          try:
3642
            val = instance.FindDisk(idx).size
3643
          except errors.OpPrereqError:
3644
            val = None
3645
        elif field == "disk_usage": # total disk usage per node
3646
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
3647
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
3648
        elif field == "tags":
3649
          val = list(instance.GetTags())
3650
        elif field == "serial_no":
3651
          val = instance.serial_no
3652
        elif field == "network_port":
3653
          val = instance.network_port
3654
        elif field == "hypervisor":
3655
          val = instance.hypervisor
3656
        elif field == "hvparams":
3657
          val = i_hv
3658
        elif (field.startswith(HVPREFIX) and
3659
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
3660
          val = i_hv.get(field[len(HVPREFIX):], None)
3661
        elif field == "beparams":
3662
          val = i_be
3663
        elif (field.startswith(BEPREFIX) and
3664
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
3665
          val = i_be.get(field[len(BEPREFIX):], None)
3666
        elif st_match and st_match.groups():
3667
          # matches a variable list
3668
          st_groups = st_match.groups()
3669
          if st_groups and st_groups[0] == "disk":
3670
            if st_groups[1] == "count":
3671
              val = len(instance.disks)
3672
            elif st_groups[1] == "sizes":
3673
              val = [disk.size for disk in instance.disks]
3674
            elif st_groups[1] == "size":
3675
              try:
3676
                val = instance.FindDisk(st_groups[2]).size
3677
              except errors.OpPrereqError:
3678
                val = None
3679
            else:
3680
              assert False, "Unhandled disk parameter"
3681
          elif st_groups[0] == "nic":
3682
            if st_groups[1] == "count":
3683
              val = len(instance.nics)
3684
            elif st_groups[1] == "macs":
3685
              val = [nic.mac for nic in instance.nics]
3686
            elif st_groups[1] == "ips":
3687
              val = [nic.ip for nic in instance.nics]
3688
            elif st_groups[1] == "modes":
3689
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
3690
            elif st_groups[1] == "links":
3691
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
3692
            elif st_groups[1] == "bridges":
3693
              val = []
3694
              for nicp in i_nicp:
3695
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3696
                  val.append(nicp[constants.NIC_LINK])
3697
                else:
3698
                  val.append(None)
3699
            else:
3700
              # index-based item
3701
              nic_idx = int(st_groups[2])
3702
              if nic_idx >= len(instance.nics):
3703
                val = None
3704
              else:
3705
                if st_groups[1] == "mac":
3706
                  val = instance.nics[nic_idx].mac
3707
                elif st_groups[1] == "ip":
3708
                  val = instance.nics[nic_idx].ip
3709
                elif st_groups[1] == "mode":
3710
                  val = i_nicp[nic_idx][constants.NIC_MODE]
3711
                elif st_groups[1] == "link":
3712
                  val = i_nicp[nic_idx][constants.NIC_LINK]
3713
                elif st_groups[1] == "bridge":
3714
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
3715
                  if nic_mode == constants.NIC_MODE_BRIDGED:
3716
                    val = i_nicp[nic_idx][constants.NIC_LINK]
3717
                  else:
3718
                    val = None
3719
                else:
3720
                  assert False, "Unhandled NIC parameter"
3721
          else:
3722
            assert False, ("Declared but unhandled variable parameter '%s'" %
3723
                           field)
3724
        else:
3725
          assert False, "Declared but unhandled parameter '%s'" % field
3726
        iout.append(val)
3727
      output.append(iout)
3728

    
3729
    return output
3730

    
3731

    
3732
class LUFailoverInstance(LogicalUnit):
3733
  """Failover an instance.
3734

3735
  """
3736
  HPATH = "instance-failover"
3737
  HTYPE = constants.HTYPE_INSTANCE
3738
  _OP_REQP = ["instance_name", "ignore_consistency"]
3739
  REQ_BGL = False
3740

    
3741
  def ExpandNames(self):
3742
    self._ExpandAndLockInstance()
3743
    self.needed_locks[locking.LEVEL_NODE] = []
3744
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3745

    
3746
  def DeclareLocks(self, level):
3747
    if level == locking.LEVEL_NODE:
3748
      self._LockInstancesNodes()
3749

    
3750
  def BuildHooksEnv(self):
3751
    """Build hooks env.
3752

3753
    This runs on master, primary and secondary nodes of the instance.
3754

3755
    """
3756
    env = {
3757
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
3758
      }
3759
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3760
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
3761
    return env, nl, nl
3762

    
3763
  def CheckPrereq(self):
3764
    """Check prerequisites.
3765

3766
    This checks that the instance is in the cluster.
3767

3768
    """
3769
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3770
    assert self.instance is not None, \
3771
      "Cannot retrieve locked instance %s" % self.op.instance_name
3772

    
3773
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3774
    if instance.disk_template not in constants.DTS_NET_MIRROR:
3775
      raise errors.OpPrereqError("Instance's disk layout is not"
3776
                                 " network mirrored, cannot failover.")
3777

    
3778
    secondary_nodes = instance.secondary_nodes
3779
    if not secondary_nodes:
3780
      raise errors.ProgrammerError("no secondary node but using "
3781
                                   "a mirrored disk template")
3782

    
3783
    target_node = secondary_nodes[0]
3784
    _CheckNodeOnline(self, target_node)
3785
    _CheckNodeNotDrained(self, target_node)
3786
    if instance.admin_up:
3787
      # check memory requirements on the secondary node
3788
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
3789
                           instance.name, bep[constants.BE_MEMORY],
3790
                           instance.hypervisor)
3791
    else:
3792
      self.LogInfo("Not checking memory on the secondary node as"
3793
                   " instance will not be started")
3794

    
3795
    # check bridge existance
3796
    _CheckInstanceBridgesExist(self, instance, node=target_node)
3797

    
3798
  def Exec(self, feedback_fn):
3799
    """Failover an instance.
3800

3801
    The failover is done by shutting it down on its present node and
3802
    starting it on the secondary.
3803

3804
    """
3805
    instance = self.instance
3806

    
3807
    source_node = instance.primary_node
3808
    target_node = instance.secondary_nodes[0]
3809

    
3810
    feedback_fn("* checking disk consistency between source and target")
3811
    for dev in instance.disks:
3812
      # for drbd, these are drbd over lvm
3813
      if not _CheckDiskConsistency(self, dev, target_node, False):
3814
        if instance.admin_up and not self.op.ignore_consistency:
3815
          raise errors.OpExecError("Disk %s is degraded on target node,"
3816
                                   " aborting failover." % dev.iv_name)
3817

    
3818
    feedback_fn("* shutting down instance on source node")
3819
    logging.info("Shutting down instance %s on node %s",
3820
                 instance.name, source_node)
3821

    
3822
    result = self.rpc.call_instance_shutdown(source_node, instance)
3823
    msg = result.fail_msg
3824
    if msg:
3825
      if self.op.ignore_consistency:
3826
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
3827
                             " Proceeding anyway. Please make sure node"
3828
                             " %s is down. Error details: %s",
3829
                             instance.name, source_node, source_node, msg)
3830
      else:
3831
        raise errors.OpExecError("Could not shutdown instance %s on"
3832
                                 " node %s: %s" %
3833
                                 (instance.name, source_node, msg))
3834

    
3835
    feedback_fn("* deactivating the instance's disks on source node")
3836
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
3837
      raise errors.OpExecError("Can't shut down the instance's disks.")
3838

    
3839
    instance.primary_node = target_node
3840
    # distribute new instance config to the other nodes
3841
    self.cfg.Update(instance)
3842

    
3843
    # Only start the instance if it's marked as up
3844
    if instance.admin_up:
3845
      feedback_fn("* activating the instance's disks on target node")
3846
      logging.info("Starting instance %s on node %s",
3847
                   instance.name, target_node)
3848

    
3849
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
3850
                                               ignore_secondaries=True)
3851
      if not disks_ok:
3852
        _ShutdownInstanceDisks(self, instance)
3853
        raise errors.OpExecError("Can't activate the instance's disks")
3854

    
3855
      feedback_fn("* starting the instance on the target node")
3856
      result = self.rpc.call_instance_start(target_node, instance, None, None)
3857
      msg = result.fail_msg
3858
      if msg:
3859
        _ShutdownInstanceDisks(self, instance)
3860
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
3861
                                 (instance.name, target_node, msg))
3862

    
3863

    
3864
class LUMigrateInstance(LogicalUnit):
3865
  """Migrate an instance.
3866

3867
  This is migration without shutting down, compared to the failover,
3868
  which is done with shutdown.
3869

3870
  """
3871
  HPATH = "instance-migrate"
3872
  HTYPE = constants.HTYPE_INSTANCE
3873
  _OP_REQP = ["instance_name", "live", "cleanup"]
3874

    
3875
  REQ_BGL = False
3876

    
3877
  def ExpandNames(self):
3878
    self._ExpandAndLockInstance()
3879
    self.needed_locks[locking.LEVEL_NODE] = []
3880
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3881

    
3882
  def DeclareLocks(self, level):
3883
    if level == locking.LEVEL_NODE:
3884
      self._LockInstancesNodes()
3885

    
3886
  def BuildHooksEnv(self):
3887
    """Build hooks env.
3888

3889
    This runs on master, primary and secondary nodes of the instance.
3890

3891
    """
3892
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3893
    env["MIGRATE_LIVE"] = self.op.live
3894
    env["MIGRATE_CLEANUP"] = self.op.cleanup
3895
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
3896
    return env, nl, nl
3897

    
3898
  def CheckPrereq(self):
3899
    """Check prerequisites.
3900

3901
    This checks that the instance is in the cluster.
3902

3903
    """
3904
    instance = self.cfg.GetInstanceInfo(
3905
      self.cfg.ExpandInstanceName(self.op.instance_name))
3906
    if instance is None:
3907
      raise errors.OpPrereqError("Instance '%s' not known" %
3908
                                 self.op.instance_name)
3909

    
3910
    if instance.disk_template != constants.DT_DRBD8:
3911
      raise errors.OpPrereqError("Instance's disk layout is not"
3912
                                 " drbd8, cannot migrate.")
3913

    
3914
    secondary_nodes = instance.secondary_nodes
3915
    if not secondary_nodes:
3916
      raise errors.ConfigurationError("No secondary node but using"
3917
                                      " drbd8 disk template")
3918

    
3919
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
3920

    
3921
    target_node = secondary_nodes[0]
3922
    # check memory requirements on the secondary node
3923
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
3924
                         instance.name, i_be[constants.BE_MEMORY],
3925
                         instance.hypervisor)
3926

    
3927
    # check bridge existance
3928
    _CheckInstanceBridgesExist(self, instance, node=target_node)
3929

    
3930
    if not self.op.cleanup:
3931
      _CheckNodeNotDrained(self, target_node)
3932
      result = self.rpc.call_instance_migratable(instance.primary_node,
3933
                                                 instance)
3934
      result.Raise("Can't migrate, please use failover", prereq=True)
3935

    
3936
    self.instance = instance
3937

    
3938
  def _WaitUntilSync(self):
3939
    """Poll with custom rpc for disk sync.
3940

3941
    This uses our own step-based rpc call.
3942

3943
    """
3944
    self.feedback_fn("* wait until resync is done")
3945
    all_done = False
3946
    while not all_done:
3947
      all_done = True
3948
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
3949
                                            self.nodes_ip,
3950
                                            self.instance.disks)
3951
      min_percent = 100
3952
      for node, nres in result.items():
3953
        nres.Raise("Cannot resync disks on node %s" % node)
3954
        node_done, node_percent = nres.payload
3955
        all_done = all_done and node_done
3956
        if node_percent is not None:
3957
          min_percent = min(min_percent, node_percent)
3958
      if not all_done:
3959
        if min_percent < 100:
3960
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
3961
        time.sleep(2)
3962

    
3963
  def _EnsureSecondary(self, node):
3964
    """Demote a node to secondary.
3965

3966
    """
3967
    self.feedback_fn("* switching node %s to secondary mode" % node)
3968

    
3969
    for dev in self.instance.disks:
3970
      self.cfg.SetDiskID(dev, node)
3971

    
3972
    result = self.rpc.call_blockdev_close(node, self.instance.name,
3973
                                          self.instance.disks)
3974
    result.Raise("Cannot change disk to secondary on node %s" % node)
3975

    
3976
  def _GoStandalone(self):
3977
    """Disconnect from the network.
3978

3979
    """
3980
    self.feedback_fn("* changing into standalone mode")
3981
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
3982
                                               self.instance.disks)
3983
    for node, nres in result.items():
3984
      nres.Raise("Cannot disconnect disks node %s" % node)
3985

    
3986
  def _GoReconnect(self, multimaster):
3987
    """Reconnect to the network.
3988

3989
    """
3990
    if multimaster:
3991
      msg = "dual-master"
3992
    else:
3993
      msg = "single-master"
3994
    self.feedback_fn("* changing disks into %s mode" % msg)
3995
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
3996
                                           self.instance.disks,
3997
                                           self.instance.name, multimaster)
3998
    for node, nres in result.items():
3999
      nres.Raise("Cannot change disks config on node %s" % node)
4000

    
4001
  def _ExecCleanup(self):
4002
    """Try to cleanup after a failed migration.
4003

4004
    The cleanup is done by:
4005
      - check that the instance is running only on one node
4006
        (and update the config if needed)
4007
      - change disks on its secondary node to secondary
4008
      - wait until disks are fully synchronized
4009
      - disconnect from the network
4010
      - change disks into single-master mode
4011
      - wait again until disks are fully synchronized
4012

4013
    """
4014
    instance = self.instance
4015
    target_node = self.target_node
4016
    source_node = self.source_node
4017

    
4018
    # check running on only one node
4019
    self.feedback_fn("* checking where the instance actually runs"
4020
                     " (if this hangs, the hypervisor might be in"
4021
                     " a bad state)")
4022
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
4023
    for node, result in ins_l.items():
4024
      result.Raise("Can't contact node %s" % node)
4025

    
4026
    runningon_source = instance.name in ins_l[source_node].payload
4027
    runningon_target = instance.name in ins_l[target_node].payload
4028

    
4029
    if runningon_source and runningon_target:
4030
      raise errors.OpExecError("Instance seems to be running on two nodes,"
4031
                               " or the hypervisor is confused. You will have"
4032
                               " to ensure manually that it runs only on one"
4033
                               " and restart this operation.")
4034

    
4035
    if not (runningon_source or runningon_target):
4036
      raise errors.OpExecError("Instance does not seem to be running at all."
4037
                               " In this case, it's safer to repair by"
4038
                               " running 'gnt-instance stop' to ensure disk"
4039
                               " shutdown, and then restarting it.")
4040

    
4041
    if runningon_target:
4042
      # the migration has actually succeeded, we need to update the config
4043
      self.feedback_fn("* instance running on secondary node (%s),"
4044
                       " updating config" % target_node)
4045
      instance.primary_node = target_node
4046
      self.cfg.Update(instance)
4047
      demoted_node = source_node
4048
    else:
4049
      self.feedback_fn("* instance confirmed to be running on its"
4050
                       " primary node (%s)" % source_node)
4051
      demoted_node = target_node
4052

    
4053
    self._EnsureSecondary(demoted_node)
4054
    try:
4055
      self._WaitUntilSync()
4056
    except errors.OpExecError:
4057
      # we ignore here errors, since if the device is standalone, it
4058
      # won't be able to sync
4059
      pass
4060
    self._GoStandalone()
4061
    self._GoReconnect(False)
4062
    self._WaitUntilSync()
4063

    
4064
    self.feedback_fn("* done")
4065

    
4066
  def _RevertDiskStatus(self):
4067
    """Try to revert the disk status after a failed migration.
4068

4069
    """
4070
    target_node = self.target_node
4071
    try:
4072
      self._EnsureSecondary(target_node)
4073
      self._GoStandalone()
4074
      self._GoReconnect(False)
4075
      self._WaitUntilSync()
4076
    except errors.OpExecError, err:
4077
      self.LogWarning("Migration failed and I can't reconnect the"
4078
                      " drives: error '%s'\n"
4079
                      "Please look and recover the instance status" %
4080
                      str(err))
4081

    
4082
  def _AbortMigration(self):
4083
    """Call the hypervisor code to abort a started migration.
4084

4085
    """
4086
    instance = self.instance
4087
    target_node = self.target_node
4088
    migration_info = self.migration_info
4089

    
4090
    abort_result = self.rpc.call_finalize_migration(target_node,
4091
                                                    instance,
4092
                                                    migration_info,
4093
                                                    False)
4094
    abort_msg = abort_result.fail_msg
4095
    if abort_msg:
4096
      logging.error("Aborting migration failed on target node %s: %s" %
4097
                    (target_node, abort_msg))
4098
      # Don't raise an exception here, as we stil have to try to revert the
4099
      # disk status, even if this step failed.
4100

    
4101
  def _ExecMigration(self):
4102
    """Migrate an instance.
4103

4104
    The migrate is done by:
4105
      - change the disks into dual-master mode
4106
      - wait until disks are fully synchronized again
4107
      - migrate the instance
4108
      - change disks on the new secondary node (the old primary) to secondary
4109
      - wait until disks are fully synchronized
4110
      - change disks into single-master mode
4111

4112
    """
4113
    instance = self.instance
4114
    target_node = self.target_node
4115
    source_node = self.source_node
4116

    
4117
    self.feedback_fn("* checking disk consistency between source and target")
4118
    for dev in instance.disks:
4119
      if not _CheckDiskConsistency(self, dev, target_node, False):
4120
        raise errors.OpExecError("Disk %s is degraded or not fully"
4121
                                 " synchronized on target node,"
4122
                                 " aborting migrate." % dev.iv_name)
4123

    
4124
    # First get the migration information from the remote node
4125
    result = self.rpc.call_migration_info(source_node, instance)
4126
    msg = result.fail_msg
4127
    if msg:
4128
      log_err = ("Failed fetching source migration information from %s: %s" %
4129
                 (source_node, msg))
4130
      logging.error(log_err)
4131
      raise errors.OpExecError(log_err)
4132

    
4133
    self.migration_info = migration_info = result.payload
4134

    
4135
    # Then switch the disks to master/master mode
4136
    self._EnsureSecondary(target_node)
4137
    self._GoStandalone()
4138
    self._GoReconnect(True)
4139
    self._WaitUntilSync()
4140

    
4141
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
4142
    result = self.rpc.call_accept_instance(target_node,
4143
                                           instance,
4144
                                           migration_info,
4145
                                           self.nodes_ip[target_node])
4146

    
4147
    msg = result.fail_msg
4148
    if msg:
4149
      logging.error("Instance pre-migration failed, trying to revert"
4150
                    " disk status: %s", msg)
4151
      self._AbortMigration()
4152
      self._RevertDiskStatus()
4153
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
4154
                               (instance.name, msg))
4155

    
4156
    self.feedback_fn("* migrating instance to %s" % target_node)
4157
    time.sleep(10)
4158
    result = self.rpc.call_instance_migrate(source_node, instance,
4159
                                            self.nodes_ip[target_node],
4160
                                            self.op.live)
4161
    msg = result.fail_msg
4162
    if msg:
4163
      logging.error("Instance migration failed, trying to revert"
4164
                    " disk status: %s", msg)
4165
      self._AbortMigration()
4166
      self._RevertDiskStatus()
4167
      raise errors.OpExecError("Could not migrate instance %s: %s" %
4168
                               (instance.name, msg))
4169
    time.sleep(10)
4170

    
4171
    instance.primary_node = target_node
4172
    # distribute new instance config to the other nodes
4173
    self.cfg.Update(instance)
4174

    
4175
    result = self.rpc.call_finalize_migration(target_node,
4176
                                              instance,
4177
                                              migration_info,
4178
                                              True)
4179
    msg = result.fail_msg
4180
    if msg:
4181
      logging.error("Instance migration succeeded, but finalization failed:"
4182
                    " %s" % msg)
4183
      raise errors.OpExecError("Could not finalize instance migration: %s" %
4184
                               msg)
4185

    
4186
    self._EnsureSecondary(source_node)
4187
    self._WaitUntilSync()
4188
    self._GoStandalone()
4189
    self._GoReconnect(False)
4190
    self._WaitUntilSync()
4191

    
4192
    self.feedback_fn("* done")
4193

    
4194
  def Exec(self, feedback_fn):
4195
    """Perform the migration.
4196

4197
    """
4198
    self.feedback_fn = feedback_fn
4199

    
4200
    self.source_node = self.instance.primary_node
4201
    self.target_node = self.instance.secondary_nodes[0]
4202
    self.all_nodes = [self.source_node, self.target_node]
4203
    self.nodes_ip = {
4204
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
4205
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
4206
      }
4207
    if self.op.cleanup:
4208
      return self._ExecCleanup()
4209
    else:
4210
      return self._ExecMigration()
4211

    
4212

    
4213
def _CreateBlockDev(lu, node, instance, device, force_create,
4214
                    info, force_open):
4215
  """Create a tree of block devices on a given node.
4216

4217
  If this device type has to be created on secondaries, create it and
4218
  all its children.
4219

4220
  If not, just recurse to children keeping the same 'force' value.
4221

4222
  @param lu: the lu on whose behalf we execute
4223
  @param node: the node on which to create the device
4224
  @type instance: L{objects.Instance}
4225
  @param instance: the instance which owns the device
4226
  @type device: L{objects.Disk}
4227
  @param device: the device to create
4228
  @type force_create: boolean
4229
  @param force_create: whether to force creation of this device; this
4230
      will be change to True whenever we find a device which has
4231
      CreateOnSecondary() attribute
4232
  @param info: the extra 'metadata' we should attach to the device
4233
      (this will be represented as a LVM tag)
4234
  @type force_open: boolean
4235
  @param force_open: this parameter will be passes to the
4236
      L{backend.BlockdevCreate} function where it specifies
4237
      whether we run on primary or not, and it affects both
4238
      the child assembly and the device own Open() execution
4239

4240
  """
4241
  if device.CreateOnSecondary():
4242
    force_create = True
4243

    
4244
  if device.children:
4245
    for child in device.children:
4246
      _CreateBlockDev(lu, node, instance, child, force_create,
4247
                      info, force_open)
4248

    
4249
  if not force_create:
4250
    return
4251

    
4252
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
4253

    
4254

    
4255
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
4256
  """Create a single block device on a given node.
4257

4258
  This will not recurse over children of the device, so they must be
4259
  created in advance.
4260

4261
  @param lu: the lu on whose behalf we execute
4262
  @param node: the node on which to create the device
4263
  @type instance: L{objects.Instance}
4264
  @param instance: the instance which owns the device
4265
  @type device: L{objects.Disk}
4266
  @param device: the device to create
4267
  @param info: the extra 'metadata' we should attach to the device
4268
      (this will be represented as a LVM tag)
4269
  @type force_open: boolean
4270
  @param force_open: this parameter will be passes to the
4271
      L{backend.BlockdevCreate} function where it specifies
4272
      whether we run on primary or not, and it affects both
4273
      the child assembly and the device own Open() execution
4274

4275
  """
4276
  lu.cfg.SetDiskID(device, node)
4277
  result = lu.rpc.call_blockdev_create(node, device, device.size,
4278
                                       instance.name, force_open, info)
4279
  result.Raise("Can't create block device %s on"
4280
               " node %s for instance %s" % (device, node, instance.name))
4281
  if device.physical_id is None:
4282
    device.physical_id = result.payload
4283

    
4284

    
4285
def _GenerateUniqueNames(lu, exts):
4286
  """Generate a suitable LV name.
4287

4288
  This will generate a logical volume name for the given instance.
4289

4290
  """
4291
  results = []
4292
  for val in exts:
4293
    new_id = lu.cfg.GenerateUniqueID()
4294
    results.append("%s%s" % (new_id, val))
4295
  return results
4296

    
4297

    
4298
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
4299
                         p_minor, s_minor):
4300
  """Generate a drbd8 device complete with its children.
4301

4302
  """
4303
  port = lu.cfg.AllocatePort()
4304
  vgname = lu.cfg.GetVGName()
4305
  shared_secret = lu.cfg.GenerateDRBDSecret()
4306
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
4307
                          logical_id=(vgname, names[0]))
4308
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
4309
                          logical_id=(vgname, names[1]))
4310
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
4311
                          logical_id=(primary, secondary, port,
4312
                                      p_minor, s_minor,
4313
                                      shared_secret),
4314
                          children=[dev_data, dev_meta],
4315
                          iv_name=iv_name)
4316
  return drbd_dev
4317

    
4318

    
4319
def _GenerateDiskTemplate(lu, template_name,
4320
                          instance_name, primary_node,
4321
                          secondary_nodes, disk_info,
4322
                          file_storage_dir, file_driver,
4323
                          base_index):
4324
  """Generate the entire disk layout for a given template type.
4325

4326
  """
4327
  #TODO: compute space requirements
4328

    
4329
  vgname = lu.cfg.GetVGName()
4330
  disk_count = len(disk_info)
4331
  disks = []
4332
  if template_name == constants.DT_DISKLESS:
4333
    pass
4334
  elif template_name == constants.DT_PLAIN:
4335
    if len(secondary_nodes) != 0:
4336
      raise errors.ProgrammerError("Wrong template configuration")
4337

    
4338
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
4339
                                      for i in range(disk_count)])
4340
    for idx, disk in enumerate(disk_info):
4341
      disk_index = idx + base_index
4342
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
4343
                              logical_id=(vgname, names[idx]),
4344
                              iv_name="disk/%d" % disk_index,
4345
                              mode=disk["mode"])
4346
      disks.append(disk_dev)
4347
  elif template_name == constants.DT_DRBD8:
4348
    if len(secondary_nodes) != 1:
4349
      raise errors.ProgrammerError("Wrong template configuration")
4350
    remote_node = secondary_nodes[0]
4351
    minors = lu.cfg.AllocateDRBDMinor(
4352
      [primary_node, remote_node] * len(disk_info), instance_name)
4353

    
4354
    names = []
4355
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
4356
                                               for i in range(disk_count)]):
4357
      names.append(lv_prefix + "_data")
4358
      names.append(lv_prefix + "_meta")
4359
    for idx, disk in enumerate(disk_info):
4360
      disk_index = idx + base_index
4361
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
4362
                                      disk["size"], names[idx*2:idx*2+2],
4363
                                      "disk/%d" % disk_index,
4364
                                      minors[idx*2], minors[idx*2+1])
4365
      disk_dev.mode = disk["mode"]
4366
      disks.append(disk_dev)
4367
  elif template_name == constants.DT_FILE:
4368
    if len(secondary_nodes) != 0:
4369
      raise errors.ProgrammerError("Wrong template configuration")
4370

    
4371
    for idx, disk in enumerate(disk_info):
4372
      disk_index = idx + base_index
4373
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
4374
                              iv_name="disk/%d" % disk_index,
4375
                              logical_id=(file_driver,
4376
                                          "%s/disk%d" % (file_storage_dir,
4377
                                                         disk_index)),
4378
                              mode=disk["mode"])
4379
      disks.append(disk_dev)
4380
  else:
4381
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
4382
  return disks
4383

    
4384

    
4385
def _GetInstanceInfoText(instance):
4386
  """Compute that text that should be added to the disk's metadata.
4387

4388
  """
4389
  return "originstname+%s" % instance.name
4390

    
4391

    
4392
def _CreateDisks(lu, instance):
4393
  """Create all disks for an instance.
4394

4395
  This abstracts away some work from AddInstance.
4396

4397
  @type lu: L{LogicalUnit}
4398
  @param lu: the logical unit on whose behalf we execute
4399
  @type instance: L{objects.Instance}
4400
  @param instance: the instance whose disks we should create
4401
  @rtype: boolean
4402
  @return: the success of the creation
4403

4404
  """
4405
  info = _GetInstanceInfoText(instance)
4406
  pnode = instance.primary_node
4407

    
4408
  if instance.disk_template == constants.DT_FILE:
4409
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4410
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
4411

    
4412
    result.Raise("Failed to create directory '%s' on"
4413
                 " node %s: %s" % (file_storage_dir, pnode))
4414

    
4415
  # Note: this needs to be kept in sync with adding of disks in
4416
  # LUSetInstanceParams
4417
  for device in instance.disks:
4418
    logging.info("Creating volume %s for instance %s",
4419
                 device.iv_name, instance.name)
4420
    #HARDCODE
4421
    for node in instance.all_nodes:
4422
      f_create = node == pnode
4423
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
4424

    
4425

    
4426
def _RemoveDisks(lu, instance):
4427
  """Remove all disks for an instance.
4428

4429
  This abstracts away some work from `AddInstance()` and
4430
  `RemoveInstance()`. Note that in case some of the devices couldn't
4431
  be removed, the removal will continue with the other ones (compare
4432
  with `_CreateDisks()`).
4433

4434
  @type lu: L{LogicalUnit}
4435
  @param lu: the logical unit on whose behalf we execute
4436
  @type instance: L{objects.Instance}
4437
  @param instance: the instance whose disks we should remove
4438
  @rtype: boolean
4439
  @return: the success of the removal
4440

4441
  """
4442
  logging.info("Removing block devices for instance %s", instance.name)
4443

    
4444
  all_result = True
4445
  for device in instance.disks:
4446
    for node, disk in device.ComputeNodeTree(instance.primary_node):
4447
      lu.cfg.SetDiskID(disk, node)
4448
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
4449
      if msg:
4450
        lu.LogWarning("Could not remove block device %s on node %s,"
4451
                      " continuing anyway: %s", device.iv_name, node, msg)
4452
        all_result = False
4453

    
4454
  if instance.disk_template == constants.DT_FILE:
4455
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4456
    result = lu.rpc.call_file_storage_dir_remove(instance.primary_node,
4457
                                                 file_storage_dir)
4458
    msg = result.fail_msg
4459
    if msg:
4460
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
4461
                    file_storage_dir, instance.primary_node, msg)
4462
      all_result = False
4463

    
4464
  return all_result
4465

    
4466

    
4467
def _ComputeDiskSize(disk_template, disks):
4468
  """Compute disk size requirements in the volume group
4469

4470
  """
4471
  # Required free disk space as a function of disk and swap space
4472
  req_size_dict = {
4473
    constants.DT_DISKLESS: None,
4474
    constants.DT_PLAIN: sum(d["size"] for d in disks),
4475
    # 128 MB are added for drbd metadata for each disk
4476
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
4477
    constants.DT_FILE: None,
4478
  }
4479

    
4480
  if disk_template not in req_size_dict:
4481
    raise errors.ProgrammerError("Disk template '%s' size requirement"
4482
                                 " is unknown" %  disk_template)
4483

    
4484
  return req_size_dict[disk_template]
4485

    
4486

    
4487
def _CheckHVParams(lu, nodenames, hvname, hvparams):
4488
  """Hypervisor parameter validation.
4489

4490
  This function abstract the hypervisor parameter validation to be
4491
  used in both instance create and instance modify.
4492

4493
  @type lu: L{LogicalUnit}
4494
  @param lu: the logical unit for which we check
4495
  @type nodenames: list
4496
  @param nodenames: the list of nodes on which we should check
4497
  @type hvname: string
4498
  @param hvname: the name of the hypervisor we should use
4499
  @type hvparams: dict
4500
  @param hvparams: the parameters which we need to check
4501
  @raise errors.OpPrereqError: if the parameters are not valid
4502

4503
  """
4504
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
4505
                                                  hvname,
4506
                                                  hvparams)
4507
  for node in nodenames:
4508
    info = hvinfo[node]
4509
    if info.offline:
4510
      continue
4511
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
4512

    
4513

    
4514
class LUCreateInstance(LogicalUnit):
4515
  """Create an instance.
4516

4517
  """
4518
  HPATH = "instance-add"
4519
  HTYPE = constants.HTYPE_INSTANCE
4520
  _OP_REQP = ["instance_name", "disks", "disk_template",
4521
              "mode", "start",
4522
              "wait_for_sync", "ip_check", "nics",
4523
              "hvparams", "beparams"]
4524
  REQ_BGL = False
4525

    
4526
  def _ExpandNode(self, node):
4527
    """Expands and checks one node name.
4528

4529
    """
4530
    node_full = self.cfg.ExpandNodeName(node)
4531
    if node_full is None:
4532
      raise errors.OpPrereqError("Unknown node %s" % node)
4533
    return node_full
4534

    
4535
  def ExpandNames(self):
4536
    """ExpandNames for CreateInstance.
4537

4538
    Figure out the right locks for instance creation.
4539

4540
    """
4541
    self.needed_locks = {}
4542

    
4543
    # set optional parameters to none if they don't exist
4544
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
4545
      if not hasattr(self.op, attr):
4546
        setattr(self.op, attr, None)
4547

    
4548
    # cheap checks, mostly valid constants given
4549

    
4550
    # verify creation mode
4551
    if self.op.mode not in (constants.INSTANCE_CREATE,
4552
                            constants.INSTANCE_IMPORT):
4553
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
4554
                                 self.op.mode)
4555

    
4556
    # disk template and mirror node verification
4557
    if self.op.disk_template not in constants.DISK_TEMPLATES:
4558
      raise errors.OpPrereqError("Invalid disk template name")
4559

    
4560
    if self.op.hypervisor is None:
4561
      self.op.hypervisor = self.cfg.GetHypervisorType()
4562

    
4563
    cluster = self.cfg.GetClusterInfo()
4564
    enabled_hvs = cluster.enabled_hypervisors
4565
    if self.op.hypervisor not in enabled_hvs:
4566
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
4567
                                 " cluster (%s)" % (self.op.hypervisor,
4568
                                  ",".join(enabled_hvs)))
4569

    
4570
    # check hypervisor parameter syntax (locally)
4571
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4572
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
4573
                                  self.op.hvparams)
4574
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
4575
    hv_type.CheckParameterSyntax(filled_hvp)
4576
    self.hv_full = filled_hvp
4577

    
4578
    # fill and remember the beparams dict
4579
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4580
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
4581
                                    self.op.beparams)
4582

    
4583
    #### instance parameters check
4584

    
4585
    # instance name verification
4586
    hostname1 = utils.HostInfo(self.op.instance_name)
4587
    self.op.instance_name = instance_name = hostname1.name
4588

    
4589
    # this is just a preventive check, but someone might still add this
4590
    # instance in the meantime, and creation will fail at lock-add time
4591
    if instance_name in self.cfg.GetInstanceList():
4592
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4593
                                 instance_name)
4594

    
4595
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
4596

    
4597
    # NIC buildup
4598
    self.nics = []
4599
    for idx, nic in enumerate(self.op.nics):
4600
      nic_mode_req = nic.get("mode", None)
4601
      nic_mode = nic_mode_req
4602
      if nic_mode is None:
4603
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
4604

    
4605
      # in routed mode, for the first nic, the default ip is 'auto'
4606
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
4607
        default_ip_mode = constants.VALUE_AUTO
4608
      else:
4609
        default_ip_mode = constants.VALUE_NONE
4610

    
4611
      # ip validity checks
4612
      ip = nic.get("ip", default_ip_mode)
4613
      if ip is None or ip.lower() == constants.VALUE_NONE:
4614
        nic_ip = None
4615
      elif ip.lower() == constants.VALUE_AUTO:
4616
        nic_ip = hostname1.ip
4617
      else:
4618
        if not utils.IsValidIP(ip):
4619
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
4620
                                     " like a valid IP" % ip)
4621
        nic_ip = ip
4622

    
4623
      # TODO: check the ip for uniqueness !!
4624
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
4625
        raise errors.OpPrereqError("Routed nic mode requires an ip address")
4626

    
4627
      # MAC address verification
4628
      mac = nic.get("mac", constants.VALUE_AUTO)
4629
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
4630
        if not utils.IsValidMac(mac.lower()):
4631
          raise errors.OpPrereqError("Invalid MAC address specified: %s" %
4632
                                     mac)
4633
      # bridge verification
4634
      bridge = nic.get("bridge", None)
4635
      link = nic.get("link", None)
4636
      if bridge and link:
4637
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
4638
                                   " at the same time")
4639
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
4640
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic")
4641
      elif bridge:
4642
        link = bridge
4643

    
4644
      nicparams = {}
4645
      if nic_mode_req:
4646
        nicparams[constants.NIC_MODE] = nic_mode_req
4647
      if link:
4648
        nicparams[constants.NIC_LINK] = link
4649

    
4650
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4651
                                      nicparams)
4652
      objects.NIC.CheckParameterSyntax(check_params)
4653
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
4654

    
4655
    # disk checks/pre-build
4656
    self.disks = []
4657
    for disk in self.op.disks:
4658
      mode = disk.get("mode", constants.DISK_RDWR)
4659
      if mode not in constants.DISK_ACCESS_SET:
4660
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
4661
                                   mode)
4662
      size = disk.get("size", None)
4663
      if size is None:
4664
        raise errors.OpPrereqError("Missing disk size")
4665
      try:
4666
        size = int(size)
4667
      except ValueError:
4668
        raise errors.OpPrereqError("Invalid disk size '%s'" % size)
4669
      self.disks.append({"size": size, "mode": mode})
4670

    
4671
    # used in CheckPrereq for ip ping check
4672
    self.check_ip = hostname1.ip
4673

    
4674
    # file storage checks
4675
    if (self.op.file_driver and
4676
        not self.op.file_driver in constants.FILE_DRIVER):
4677
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
4678
                                 self.op.file_driver)
4679

    
4680
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
4681
      raise errors.OpPrereqError("File storage directory path not absolute")
4682

    
4683
    ### Node/iallocator related checks
4684
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
4685
      raise errors.OpPrereqError("One and only one of iallocator and primary"
4686
                                 " node must be given")
4687

    
4688
    if self.op.iallocator:
4689
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4690
    else:
4691
      self.op.pnode = self._ExpandNode(self.op.pnode)
4692
      nodelist = [self.op.pnode]
4693
      if self.op.snode is not None:
4694
        self.op.snode = self._ExpandNode(self.op.snode)
4695
        nodelist.append(self.op.snode)
4696
      self.needed_locks[locking.LEVEL_NODE] = nodelist
4697

    
4698
    # in case of import lock the source node too
4699
    if self.op.mode == constants.INSTANCE_IMPORT:
4700
      src_node = getattr(self.op, "src_node", None)
4701
      src_path = getattr(self.op, "src_path", None)
4702

    
4703
      if src_path is None:
4704
        self.op.src_path = src_path = self.op.instance_name
4705

    
4706
      if src_node is None:
4707
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4708
        self.op.src_node = None
4709
        if os.path.isabs(src_path):
4710
          raise errors.OpPrereqError("Importing an instance from an absolute"
4711
                                     " path requires a source node option.")
4712
      else:
4713
        self.op.src_node = src_node = self._ExpandNode(src_node)
4714
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4715
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
4716
        if not os.path.isabs(src_path):
4717
          self.op.src_path = src_path = \
4718
            os.path.join(constants.EXPORT_DIR, src_path)
4719

    
4720
    else: # INSTANCE_CREATE
4721
      if getattr(self.op, "os_type", None) is None:
4722
        raise errors.OpPrereqError("No guest OS specified")
4723

    
4724
  def _RunAllocator(self):
4725
    """Run the allocator based on input opcode.
4726

4727
    """
4728
    nics = [n.ToDict() for n in self.nics]
4729
    ial = IAllocator(self.cfg, self.rpc,
4730
                     mode=constants.IALLOCATOR_MODE_ALLOC,
4731
                     name=self.op.instance_name,
4732
                     disk_template=self.op.disk_template,
4733
                     tags=[],
4734
                     os=self.op.os_type,
4735
                     vcpus=self.be_full[constants.BE_VCPUS],
4736
                     mem_size=self.be_full[constants.BE_MEMORY],
4737
                     disks=self.disks,
4738
                     nics=nics,
4739
                     hypervisor=self.op.hypervisor,
4740
                     )
4741

    
4742
    ial.Run(self.op.iallocator)
4743

    
4744
    if not ial.success:
4745
      raise errors.OpPrereqError("Can't compute nodes using"
4746
                                 " iallocator '%s': %s" % (self.op.iallocator,
4747
                                                           ial.info))
4748
    if len(ial.nodes) != ial.required_nodes:
4749
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
4750
                                 " of nodes (%s), required %s" %
4751
                                 (self.op.iallocator, len(ial.nodes),
4752
                                  ial.required_nodes))
4753
    self.op.pnode = ial.nodes[0]
4754
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
4755
                 self.op.instance_name, self.op.iallocator,
4756
                 ", ".join(ial.nodes))
4757
    if ial.required_nodes == 2:
4758
      self.op.snode = ial.nodes[1]
4759

    
4760
  def BuildHooksEnv(self):
4761
    """Build hooks env.
4762

4763
    This runs on master, primary and secondary nodes of the instance.
4764

4765
    """
4766
    env = {
4767
      "ADD_MODE": self.op.mode,
4768
      }
4769
    if self.op.mode == constants.INSTANCE_IMPORT:
4770
      env["SRC_NODE"] = self.op.src_node
4771
      env["SRC_PATH"] = self.op.src_path
4772
      env["SRC_IMAGES"] = self.src_images
4773

    
4774
    env.update(_BuildInstanceHookEnv(
4775
      name=self.op.instance_name,
4776
      primary_node=self.op.pnode,
4777
      secondary_nodes=self.secondaries,
4778
      status=self.op.start,
4779
      os_type=self.op.os_type,
4780
      memory=self.be_full[constants.BE_MEMORY],
4781
      vcpus=self.be_full[constants.BE_VCPUS],
4782
      nics=_NICListToTuple(self, self.nics),
4783
      disk_template=self.op.disk_template,
4784
      disks=[(d["size"], d["mode"]) for d in self.disks],
4785
      bep=self.be_full,
4786
      hvp=self.hv_full,
4787
      hypervisor_name=self.op.hypervisor,
4788
    ))
4789

    
4790
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
4791
          self.secondaries)
4792
    return env, nl, nl
4793

    
4794

    
4795
  def CheckPrereq(self):
4796
    """Check prerequisites.
4797

4798
    """
4799
    if (not self.cfg.GetVGName() and
4800
        self.op.disk_template not in constants.DTS_NOT_LVM):
4801
      raise errors.OpPrereqError("Cluster does not support lvm-based"
4802
                                 " instances")
4803

    
4804
    if self.op.mode == constants.INSTANCE_IMPORT:
4805
      src_node = self.op.src_node
4806
      src_path = self.op.src_path
4807

    
4808
      if src_node is None:
4809
        locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
4810
        exp_list = self.rpc.call_export_list(locked_nodes)
4811
        found = False
4812
        for node in exp_list:
4813
          if exp_list[node].fail_msg:
4814
            continue
4815
          if src_path in exp_list[node].payload:
4816
            found = True
4817
            self.op.src_node = src_node = node
4818
            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
4819
                                                       src_path)
4820
            break
4821
        if not found:
4822
          raise errors.OpPrereqError("No export found for relative path %s" %
4823
                                      src_path)
4824

    
4825
      _CheckNodeOnline(self, src_node)
4826
      result = self.rpc.call_export_info(src_node, src_path)
4827
      result.Raise("No export or invalid export found in dir %s" % src_path)
4828

    
4829
      export_info = objects.SerializableConfigParser.Loads(str(result.payload))
4830
      if not export_info.has_section(constants.INISECT_EXP):
4831
        raise errors.ProgrammerError("Corrupted export config")
4832

    
4833
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
4834
      if (int(ei_version) != constants.EXPORT_VERSION):
4835
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
4836
                                   (ei_version, constants.EXPORT_VERSION))
4837

    
4838
      # Check that the new instance doesn't have less disks than the export
4839
      instance_disks = len(self.disks)
4840
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
4841
      if instance_disks < export_disks:
4842
        raise errors.OpPrereqError("Not enough disks to import."
4843
                                   " (instance: %d, export: %d)" %
4844
                                   (instance_disks, export_disks))
4845

    
4846
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
4847
      disk_images = []
4848
      for idx in range(export_disks):
4849
        option = 'disk%d_dump' % idx
4850
        if export_info.has_option(constants.INISECT_INS, option):
4851
          # FIXME: are the old os-es, disk sizes, etc. useful?
4852
          export_name = export_info.get(constants.INISECT_INS, option)
4853
          image = os.path.join(src_path, export_name)
4854
          disk_images.append(image)
4855
        else:
4856
          disk_images.append(False)
4857

    
4858
      self.src_images = disk_images
4859

    
4860
      old_name = export_info.get(constants.INISECT_INS, 'name')
4861
      # FIXME: int() here could throw a ValueError on broken exports
4862
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
4863
      if self.op.instance_name == old_name:
4864
        for idx, nic in enumerate(self.nics):
4865
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
4866
            nic_mac_ini = 'nic%d_mac' % idx
4867
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
4868

    
4869
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
4870
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
4871
    if self.op.start and not self.op.ip_check:
4872
      raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
4873
                                 " adding an instance in start mode")
4874

    
4875
    if self.op.ip_check:
4876
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
4877
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4878
                                   (self.check_ip, self.op.instance_name))
4879

    
4880
    #### mac address generation
4881
    # By generating here the mac address both the allocator and the hooks get
4882
    # the real final mac address rather than the 'auto' or 'generate' value.
4883
    # There is a race condition between the generation and the instance object
4884
    # creation, which means that we know the mac is valid now, but we're not
4885
    # sure it will be when we actually add the instance. If things go bad
4886
    # adding the instance will abort because of a duplicate mac, and the
4887
    # creation job will fail.
4888
    for nic in self.nics:
4889
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
4890
        nic.mac = self.cfg.GenerateMAC()
4891

    
4892
    #### allocator run
4893

    
4894
    if self.op.iallocator is not None:
4895
      self._RunAllocator()
4896

    
4897
    #### node related checks
4898

    
4899
    # check primary node
4900
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
4901
    assert self.pnode is not None, \
4902
      "Cannot retrieve locked node %s" % self.op.pnode
4903
    if pnode.offline:
4904
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
4905
                                 pnode.name)
4906
    if pnode.drained:
4907
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
4908
                                 pnode.name)
4909

    
4910
    self.secondaries = []
4911

    
4912
    # mirror node verification
4913
    if self.op.disk_template in constants.DTS_NET_MIRROR:
4914
      if self.op.snode is None:
4915
        raise errors.OpPrereqError("The networked disk templates need"
4916
                                   " a mirror node")
4917
      if self.op.snode == pnode.name:
4918
        raise errors.OpPrereqError("The secondary node cannot be"
4919
                                   " the primary node.")
4920
      _CheckNodeOnline(self, self.op.snode)
4921
      _CheckNodeNotDrained(self, self.op.snode)
4922
      self.secondaries.append(self.op.snode)
4923

    
4924
    nodenames = [pnode.name] + self.secondaries
4925

    
4926
    req_size = _ComputeDiskSize(self.op.disk_template,
4927
                                self.disks)
4928

    
4929
    # Check lv size requirements
4930
    if req_size is not None:
4931
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
4932
                                         self.op.hypervisor)
4933
      for node in nodenames:
4934
        info = nodeinfo[node]
4935
        info.Raise("Cannot get current information from node %s" % node)
4936
        info = info.payload
4937
        vg_free = info.get('vg_free', None)
4938
        if not isinstance(vg_free, int):
4939
          raise errors.OpPrereqError("Can't compute free disk space on"
4940
                                     " node %s" % node)
4941
        if req_size > vg_free:
4942
          raise errors.OpPrereqError("Not enough disk space on target node %s."
4943
                                     " %d MB available, %d MB required" %
4944
                                     (node, vg_free, req_size))
4945

    
4946
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
4947

    
4948
    # os verification
4949
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
4950
    result.Raise("OS '%s' not in supported os list for primary node %s" %
4951
                 (self.op.os_type, pnode.name), prereq=True)
4952

    
4953
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
4954

    
4955
    # memory check on primary node
4956
    if self.op.start:
4957
      _CheckNodeFreeMemory(self, self.pnode.name,
4958
                           "creating instance %s" % self.op.instance_name,
4959
                           self.be_full[constants.BE_MEMORY],
4960
                           self.op.hypervisor)
4961

    
4962
    self.dry_run_result = list(nodenames)
4963

    
4964
  def Exec(self, feedback_fn):
4965
    """Create and add the instance to the cluster.
4966

4967
    """
4968
    instance = self.op.instance_name
4969
    pnode_name = self.pnode.name
4970

    
4971
    ht_kind = self.op.hypervisor
4972
    if ht_kind in constants.HTS_REQ_PORT:
4973
      network_port = self.cfg.AllocatePort()
4974
    else:
4975
      network_port = None
4976

    
4977
    ##if self.op.vnc_bind_address is None:
4978
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
4979

    
4980
    # this is needed because os.path.join does not accept None arguments
4981
    if self.op.file_storage_dir is None:
4982
      string_file_storage_dir = ""
4983
    else:
4984
      string_file_storage_dir = self.op.file_storage_dir
4985

    
4986
    # build the full file storage dir path
4987
    file_storage_dir = os.path.normpath(os.path.join(
4988
                                        self.cfg.GetFileStorageDir(),
4989
                                        string_file_storage_dir, instance))
4990

    
4991

    
4992
    disks = _GenerateDiskTemplate(self,
4993
                                  self.op.disk_template,
4994
                                  instance, pnode_name,
4995
                                  self.secondaries,
4996
                                  self.disks,
4997
                                  file_storage_dir,
4998
                                  self.op.file_driver,
4999
                                  0)
5000

    
5001
    iobj = objects.Instance(name=instance, os=self.op.os_type,
5002
                            primary_node=pnode_name,
5003
                            nics=self.nics, disks=disks,
5004
                            disk_template=self.op.disk_template,
5005
                            admin_up=False,
5006
                            network_port=network_port,
5007
                            beparams=self.op.beparams,
5008
                            hvparams=self.op.hvparams,
5009
                            hypervisor=self.op.hypervisor,
5010
                            )
5011

    
5012
    feedback_fn("* creating instance disks...")
5013
    try:
5014
      _CreateDisks(self, iobj)
5015
    except errors.OpExecError:
5016
      self.LogWarning("Device creation failed, reverting...")
5017
      try:
5018
        _RemoveDisks(self, iobj)
5019
      finally:
5020
        self.cfg.ReleaseDRBDMinors(instance)
5021
        raise
5022

    
5023
    feedback_fn("adding instance %s to cluster config" % instance)
5024

    
5025
    self.cfg.AddInstance(iobj)
5026
    # Declare that we don't want to remove the instance lock anymore, as we've
5027
    # added the instance to the config
5028
    del self.remove_locks[locking.LEVEL_INSTANCE]
5029
    # Unlock all the nodes
5030
    if self.op.mode == constants.INSTANCE_IMPORT:
5031
      nodes_keep = [self.op.src_node]
5032
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
5033
                       if node != self.op.src_node]
5034
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
5035
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
5036
    else:
5037
      self.context.glm.release(locking.LEVEL_NODE)
5038
      del self.acquired_locks[locking.LEVEL_NODE]
5039

    
5040
    if self.op.wait_for_sync:
5041
      disk_abort = not _WaitForSync(self, iobj)
5042
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
5043
      # make sure the disks are not degraded (still sync-ing is ok)
5044
      time.sleep(15)
5045
      feedback_fn("* checking mirrors status")
5046
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
5047
    else:
5048
      disk_abort = False
5049

    
5050
    if disk_abort:
5051
      _RemoveDisks(self, iobj)
5052
      self.cfg.RemoveInstance(iobj.name)
5053
      # Make sure the instance lock gets removed
5054
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
5055
      raise errors.OpExecError("There are some degraded disks for"
5056
                               " this instance")
5057

    
5058
    feedback_fn("creating os for instance %s on node %s" %
5059
                (instance, pnode_name))
5060

    
5061
    if iobj.disk_template != constants.DT_DISKLESS:
5062
      if self.op.mode == constants.INSTANCE_CREATE:
5063
        feedback_fn("* running the instance OS create scripts...")
5064
        result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
5065
        result.Raise("Could not add os for instance %s"
5066
                     " on node %s" % (instance, pnode_name))
5067

    
5068
      elif self.op.mode == constants.INSTANCE_IMPORT:
5069
        feedback_fn("* running the instance OS import scripts...")
5070
        src_node = self.op.src_node
5071
        src_images = self.src_images
5072
        cluster_name = self.cfg.GetClusterName()
5073
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
5074
                                                         src_node, src_images,
5075
                                                         cluster_name)
5076
        msg = import_result.fail_msg
5077
        if msg:
5078
          self.LogWarning("Error while importing the disk images for instance"
5079
                          " %s on node %s: %s" % (instance, pnode_name, msg))
5080
      else:
5081
        # also checked in the prereq part
5082
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
5083
                                     % self.op.mode)
5084

    
5085
    if self.op.start:
5086
      iobj.admin_up = True
5087
      self.cfg.Update(iobj)
5088
      logging.info("Starting instance %s on node %s", instance, pnode_name)
5089
      feedback_fn("* starting instance...")
5090
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
5091
      result.Raise("Could not start instance")
5092

    
5093
    return list(iobj.all_nodes)
5094

    
5095

    
5096
class LUConnectConsole(NoHooksLU):
5097
  """Connect to an instance's console.
5098

5099
  This is somewhat special in that it returns the command line that
5100
  you need to run on the master node in order to connect to the
5101
  console.
5102

5103
  """
5104
  _OP_REQP = ["instance_name"]
5105
  REQ_BGL = False
5106

    
5107
  def ExpandNames(self):
5108
    self._ExpandAndLockInstance()
5109

    
5110
  def CheckPrereq(self):
5111
    """Check prerequisites.
5112

5113
    This checks that the instance is in the cluster.
5114

5115
    """
5116
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5117
    assert self.instance is not None, \
5118
      "Cannot retrieve locked instance %s" % self.op.instance_name
5119
    _CheckNodeOnline(self, self.instance.primary_node)
5120

    
5121
  def Exec(self, feedback_fn):
5122
    """Connect to the console of an instance
5123

5124
    """
5125
    instance = self.instance
5126
    node = instance.primary_node
5127

    
5128
    node_insts = self.rpc.call_instance_list([node],
5129
                                             [instance.hypervisor])[node]
5130
    node_insts.Raise("Can't get node information from %s" % node)
5131

    
5132
    if instance.name not in node_insts.payload:
5133
      raise errors.OpExecError("Instance %s is not running." % instance.name)
5134

    
5135
    logging.debug("Connecting to console of %s on %s", instance.name, node)
5136

    
5137
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
5138
    cluster = self.cfg.GetClusterInfo()
5139
    # beparams and hvparams are passed separately, to avoid editing the
5140
    # instance and then saving the defaults in the instance itself.
5141
    hvparams = cluster.FillHV(instance)
5142
    beparams = cluster.FillBE(instance)
5143
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
5144

    
5145
    # build ssh cmdline
5146
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
5147

    
5148

    
5149
class LUReplaceDisks(LogicalUnit):
5150
  """Replace the disks of an instance.
5151

5152
  """
5153
  HPATH = "mirrors-replace"
5154
  HTYPE = constants.HTYPE_INSTANCE
5155
  _OP_REQP = ["instance_name", "mode", "disks"]
5156
  REQ_BGL = False
5157

    
5158
  def CheckArguments(self):
5159
    if not hasattr(self.op, "remote_node"):
5160
      self.op.remote_node = None
5161
    if not hasattr(self.op, "iallocator"):
5162
      self.op.iallocator = None
5163

    
5164
    _DiskReplacer.CheckArguments(self.op.mode, self.op.remote_node,
5165
                                 self.op.iallocator)
5166

    
5167
  def ExpandNames(self):
5168
    self._ExpandAndLockInstance()
5169

    
5170
    if self.op.iallocator is not None:
5171
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5172

    
5173
    elif self.op.remote_node is not None:
5174
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
5175
      if remote_node is None:
5176
        raise errors.OpPrereqError("Node '%s' not known" %
5177
                                   self.op.remote_node)
5178

    
5179
      self.op.remote_node = remote_node
5180

    
5181
      # Warning: do not remove the locking of the new secondary here
5182
      # unless DRBD8.AddChildren is changed to work in parallel;
5183
      # currently it doesn't since parallel invocations of
5184
      # FindUnusedMinor will conflict
5185
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
5186
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5187

    
5188
    else:
5189
      self.needed_locks[locking.LEVEL_NODE] = []
5190
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5191

    
5192
    self.replacer = _DiskReplacer(self, self.op.instance_name, self.op.mode,
5193
                                  self.op.iallocator, self.op.remote_node,
5194
                                  self.op.disks)
5195

    
5196
  def DeclareLocks(self, level):
5197
    # If we're not already locking all nodes in the set we have to declare the
5198
    # instance's primary/secondary nodes.
5199
    if (level == locking.LEVEL_NODE and
5200
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
5201
      self._LockInstancesNodes()
5202

    
5203
  def BuildHooksEnv(self):
5204
    """Build hooks env.
5205

5206
    This runs on the master, the primary and all the secondaries.
5207

5208
    """
5209
    instance = self.replacer.instance
5210
    env = {
5211
      "MODE": self.op.mode,
5212
      "NEW_SECONDARY": self.op.remote_node,
5213
      "OLD_SECONDARY": instance.secondary_nodes[0],
5214
      }
5215
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5216
    nl = [
5217
      self.cfg.GetMasterNode(),
5218
      instance.primary_node,
5219
      ]
5220
    if self.op.remote_node is not None:
5221
      nl.append(self.op.remote_node)
5222
    return env, nl, nl
5223

    
5224
  def CheckPrereq(self):
5225
    """Check prerequisites.
5226

5227
    This checks that the instance is in the cluster.
5228

5229
    """
5230
    self.replacer.CheckPrereq()
5231

    
5232
  def Exec(self, feedback_fn):
5233
    """Execute disk replacement.
5234

5235
    This dispatches the disk replacement to the appropriate handler.
5236

5237
    """
5238
    self.replacer.Exec()
5239

    
5240

    
5241
class _DiskReplacer:
5242
  """Replaces disks for an instance.
5243

5244
  Note: Locking is not within the scope of this class.
5245

5246
  """
5247
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
5248
               disks):
5249
    """Initializes this class.
5250

5251
    """
5252
    # Parameters
5253
    self.lu = lu
5254
    self.instance_name = instance_name
5255
    self.mode = mode
5256
    self.iallocator_name = iallocator_name
5257
    self.remote_node = remote_node
5258
    self.disks = disks
5259

    
5260
    # Shortcuts
5261
    self.cfg = lu.cfg
5262
    self.rpc = lu.rpc
5263

    
5264
    # Runtime data
5265
    self.instance = None
5266
    self.new_node = None
5267
    self.target_node = None
5268
    self.other_node = None
5269
    self.remote_node_info = None
5270
    self.node_secondary_ip = None
5271

    
5272
  @staticmethod
5273
  def CheckArguments(mode, remote_node, iallocator):
5274
    # check for valid parameter combination
5275
    cnt = [remote_node, iallocator].count(None)
5276
    if mode == constants.REPLACE_DISK_CHG:
5277
      if cnt == 2:
5278
        raise errors.OpPrereqError("When changing the secondary either an"
5279
                                   " iallocator script must be used or the"
5280
                                   " new node given")
5281
      elif cnt == 0:
5282
        raise errors.OpPrereqError("Give either the iallocator or the new"
5283
                                   " secondary, not both")
5284
    else: # not replacing the secondary
5285
      if cnt != 2:
5286
        raise errors.OpPrereqError("The iallocator and new node options can"
5287
                                   " be used only when changing the"
5288
                                   " secondary node")
5289

    
5290
  @staticmethod
5291
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
5292
    """Compute a new secondary node using an IAllocator.
5293

5294
    """
5295
    ial = IAllocator(lu.cfg, lu.rpc,
5296
                     mode=constants.IALLOCATOR_MODE_RELOC,
5297
                     name=instance_name,
5298
                     relocate_from=relocate_from)
5299

    
5300
    ial.Run(iallocator_name)
5301

    
5302
    if not ial.success:
5303
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
5304
                                 " %s" % (iallocator_name, ial.info))
5305

    
5306
    if len(ial.nodes) != ial.required_nodes:
5307
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5308
                                 " of nodes (%s), required %s" %
5309
                                 (len(ial.nodes), ial.required_nodes))
5310

    
5311
    remote_node_name = ial.nodes[0]
5312

    
5313
    lu.LogInfo("Selected new secondary for instance '%s': %s",
5314
               instance_name, remote_node_name)
5315

    
5316
    return remote_node_name
5317

    
5318
  def CheckPrereq(self):
5319
    """Check prerequisites.
5320

5321
    This checks that the instance is in the cluster.
5322

5323
    """
5324
    self.instance = self.cfg.GetInstanceInfo(self.instance_name)
5325
    assert self.instance is not None, \
5326
      "Cannot retrieve locked instance %s" % self.instance_name
5327

    
5328
    if self.instance.disk_template != constants.DT_DRBD8:
5329
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
5330
                                 " instances")
5331

    
5332
    if len(self.instance.secondary_nodes) != 1:
5333
      raise errors.OpPrereqError("The instance has a strange layout,"
5334
                                 " expected one secondary but found %d" %
5335
                                 len(self.instance.secondary_nodes))
5336

    
5337
    secondary_node = self.instance.secondary_nodes[0]
5338

    
5339
    if self.iallocator_name is None:
5340
      remote_node = self.remote_node
5341
    else:
5342
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
5343
                                       self.instance.name, secondary_node)
5344

    
5345
    if remote_node is not None:
5346
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
5347
      assert self.remote_node_info is not None, \
5348
        "Cannot retrieve locked node %s" % remote_node
5349
    else:
5350
      self.remote_node_info = None
5351

    
5352
    if remote_node == self.instance.primary_node:
5353
      raise errors.OpPrereqError("The specified node is the primary node of"
5354
                                 " the instance.")
5355

    
5356
    if remote_node == secondary_node:
5357
      raise errors.OpPrereqError("The specified node is already the"
5358
                                 " secondary node of the instance.")
5359

    
5360
    if self.mode == constants.REPLACE_DISK_PRI:
5361
      self.target_node = self.instance.primary_node
5362
      self.other_node = secondary_node
5363
      check_nodes = [self.target_node, self.other_node]
5364

    
5365
    elif self.mode == constants.REPLACE_DISK_SEC:
5366
      self.target_node = secondary_node
5367
      self.other_node = self.instance.primary_node
5368
      check_nodes = [self.target_node, self.other_node]
5369

    
5370
    elif self.mode == constants.REPLACE_DISK_CHG:
5371
      self.new_node = remote_node
5372
      self.other_node = self.instance.primary_node
5373
      self.target_node = secondary_node
5374
      check_nodes = [self.new_node, self.other_node]
5375

    
5376
      _CheckNodeNotDrained(self.lu, remote_node)
5377

    
5378
    else:
5379
      raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
5380
                                   self.mode)
5381

    
5382
    for node in check_nodes:
5383
      _CheckNodeOnline(self.lu, node)
5384

    
5385
    # If not specified all disks should be replaced
5386
    if not self.disks:
5387
      self.disks = range(len(self.instance.disks))
5388

    
5389
    # Check whether disks are valid
5390
    for disk_idx in self.disks:
5391
      self.instance.FindDisk(disk_idx)
5392

    
5393
    # Get secondary node IP addresses
5394
    node_2nd_ip = {}
5395

    
5396
    for node_name in [self.target_node, self.other_node, self.new_node]:
5397
      if node_name is not None:
5398
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
5399

    
5400
    self.node_secondary_ip = node_2nd_ip
5401

    
5402
  def Exec(self):
5403
    """Execute disk replacement.
5404

5405
    This dispatches the disk replacement to the appropriate handler.
5406

5407
    """
5408
    activate_disks = (not self.instance.admin_up)
5409

    
5410
    # Activate the instance disks if we're replacing them on a down instance
5411
    if activate_disks:
5412
      _StartInstanceDisks(self.lu, self.instance, True)
5413

    
5414
    try:
5415
      if self.mode == constants.REPLACE_DISK_CHG:
5416
        return self._ExecDrbd8Secondary()
5417
      else:
5418
        return self._ExecDrbd8DiskOnly()
5419

    
5420
    finally:
5421
      # Deactivate the instance disks if we're replacing them on a down instance
5422
      if activate_disks:
5423
        _SafeShutdownInstanceDisks(self.lu, self.instance)
5424

    
5425
  def _CheckVolumeGroup(self, nodes):
5426
    self.lu.LogInfo("Checking volume groups")
5427

    
5428
    vgname = self.cfg.GetVGName()
5429

    
5430
    # Make sure volume group exists on all involved nodes
5431
    results = self.rpc.call_vg_list(nodes)
5432
    if not results:
5433
      raise errors.OpExecError("Can't list volume groups on the nodes")
5434

    
5435
    for node in nodes:
5436
      res = results[node]
5437
      res.Raise("Error checking node %s" % node)
5438
      if vgname not in res.payload:
5439
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
5440
                                 (vgname, node))
5441

    
5442
  def _CheckDisksExistence(self, nodes):
5443
    # Check disk existence
5444
    for idx, dev in enumerate(self.instance.disks):
5445
      if idx not in self.disks:
5446
        continue
5447

    
5448
      for node in nodes:
5449
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
5450
        self.cfg.SetDiskID(dev, node)
5451

    
5452
        result = self.rpc.call_blockdev_find(node, dev)
5453

    
5454
        msg = result.fail_msg
5455
        if msg or not result.payload:
5456
          if not msg:
5457
            msg = "disk not found"
5458
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
5459
                                   (idx, node, msg))
5460

    
5461
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
5462
    for idx, dev in enumerate(self.instance.disks):
5463
      if idx not in self.disks:
5464
        continue
5465

    
5466
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
5467
                      (idx, node_name))
5468

    
5469
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
5470
                                   ldisk=ldisk):
5471
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
5472
                                 " replace disks for instance %s" %
5473
                                 (node_name, self.instance.name))
5474

    
5475
  def _CreateNewStorage(self, node_name):
5476
    vgname = self.cfg.GetVGName()
5477
    iv_names = {}
5478

    
5479
    for idx, dev in enumerate(self.instance.disks):
5480
      if idx not in self.disks:
5481
        continue
5482

    
5483
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
5484

    
5485
      self.cfg.SetDiskID(dev, node_name)
5486

    
5487
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
5488
      names = _GenerateUniqueNames(self.lu, lv_names)
5489

    
5490
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
5491
                             logical_id=(vgname, names[0]))
5492
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5493
                             logical_id=(vgname, names[1]))
5494

    
5495
      new_lvs = [lv_data, lv_meta]
5496
      old_lvs = dev.children
5497
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
5498

    
5499
      # we pass force_create=True to force the LVM creation
5500
      for new_lv in new_lvs:
5501
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
5502
                        _GetInstanceInfoText(self.instance), False)
5503

    
5504
    return iv_names
5505

    
5506
  def _CheckDevices(self, node_name, iv_names):
5507
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
5508
      self.cfg.SetDiskID(dev, node_name)
5509

    
5510
      result = self.rpc.call_blockdev_find(node_name, dev)
5511

    
5512
      msg = result.fail_msg
5513
      if msg or not result.payload:
5514
        if not msg:
5515
          msg = "disk not found"
5516
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
5517
                                 (name, msg))
5518

    
5519
      if result.payload[5]:
5520
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
5521

    
5522
  def _RemoveOldStorage(self, node_name, iv_names):
5523
    for name, (dev, old_lvs, _) in iv_names.iteritems():
5524
      self.lu.LogInfo("Remove logical volumes for %s" % name)
5525

    
5526
      for lv in old_lvs:
5527
        self.cfg.SetDiskID(lv, node_name)
5528

    
5529
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
5530
        if msg:
5531
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
5532
                             hint="remove unused LVs manually")
5533

    
5534
  def _ExecDrbd8DiskOnly(self):
5535
    """Replace a disk on the primary or secondary for DRBD 8.
5536

5537
    The algorithm for replace is quite complicated:
5538

5539
      1. for each disk to be replaced:
5540

5541
        1. create new LVs on the target node with unique names
5542
        1. detach old LVs from the drbd device
5543
        1. rename old LVs to name_replaced.<time_t>
5544
        1. rename new LVs to old LVs
5545
        1. attach the new LVs (with the old names now) to the drbd device
5546

5547
      1. wait for sync across all devices
5548

5549
      1. for each modified disk:
5550

5551
        1. remove old LVs (which have the name name_replaces.<time_t>)
5552

5553
    Failures are not very well handled.
5554

5555
    """
5556
    steps_total = 6
5557

    
5558
    # Step: check device activation
5559
    self.lu.LogStep(1, steps_total, "Check device existence")
5560
    self._CheckDisksExistence([self.other_node, self.target_node])
5561
    self._CheckVolumeGroup([self.target_node, self.other_node])
5562

    
5563
    # Step: check other node consistency
5564
    self.lu.LogStep(2, steps_total, "Check peer consistency")
5565
    self._CheckDisksConsistency(self.other_node,
5566
                                self.other_node == self.instance.primary_node,
5567
                                False)
5568

    
5569
    # Step: create new storage
5570
    self.lu.LogStep(3, steps_total, "Allocate new storage")
5571
    iv_names = self._CreateNewStorage(self.target_node)
5572

    
5573
    # Step: for each lv, detach+rename*2+attach
5574
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
5575
    for dev, old_lvs, new_lvs in iv_names.itervalues():
5576
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
5577

    
5578
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev, old_lvs)
5579
      result.Raise("Can't detach drbd from local storage on node"
5580
                   " %s for device %s" % (self.target_node, dev.iv_name))
5581
      #dev.children = []
5582
      #cfg.Update(instance)
5583

    
5584
      # ok, we created the new LVs, so now we know we have the needed
5585
      # storage; as such, we proceed on the target node to rename
5586
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
5587
      # using the assumption that logical_id == physical_id (which in
5588
      # turn is the unique_id on that node)
5589

    
5590
      # FIXME(iustin): use a better name for the replaced LVs
5591
      temp_suffix = int(time.time())
5592
      ren_fn = lambda d, suff: (d.physical_id[0],
5593
                                d.physical_id[1] + "_replaced-%s" % suff)
5594

    
5595
      # Build the rename list based on what LVs exist on the node
5596
      rename_old_to_new = []
5597
      for to_ren in old_lvs:
5598
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
5599
        if not result.fail_msg and result.payload:
5600
          # device exists
5601
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
5602

    
5603
      self.lu.LogInfo("Renaming the old LVs on the target node")
5604
      result = self.rpc.call_blockdev_rename(self.target_node, rename_old_to_new)
5605
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
5606

    
5607
      # Now we rename the new LVs to the old LVs
5608
      self.lu.LogInfo("Renaming the new LVs on the target node")
5609
      rename_new_to_old = [(new, old.physical_id)
5610
                           for old, new in zip(old_lvs, new_lvs)]
5611
      result = self.rpc.call_blockdev_rename(self.target_node, rename_new_to_old)
5612
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
5613

    
5614
      for old, new in zip(old_lvs, new_lvs):
5615
        new.logical_id = old.logical_id
5616
        self.cfg.SetDiskID(new, self.target_node)
5617

    
5618
      for disk in old_lvs:
5619
        disk.logical_id = ren_fn(disk, temp_suffix)
5620
        self.cfg.SetDiskID(disk, self.target_node)
5621

    
5622
      # Now that the new lvs have the old name, we can add them to the device
5623
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
5624
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev, new_lvs)
5625
      msg = result.fail_msg
5626
      if msg:
5627
        for new_lv in new_lvs:
5628
          msg2 = self.rpc.call_blockdev_remove(self.target_node, new_lv).fail_msg
5629
          if msg2:
5630
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
5631
                               hint=("cleanup manually the unused logical"
5632
                                     "volumes"))
5633
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
5634

    
5635
      dev.children = new_lvs
5636

    
5637
      self.cfg.Update(self.instance)
5638

    
5639
    # Wait for sync
5640
    # This can fail as the old devices are degraded and _WaitForSync
5641
    # does a combined result over all disks, so we don't check its return value
5642
    self.lu.LogStep(5, steps_total, "Sync devices")
5643
    _WaitForSync(self.lu, self.instance, unlock=True)
5644

    
5645
    # Check all devices manually
5646
    self._CheckDevices(self.instance.primary_node, iv_names)
5647

    
5648
    # Step: remove old storage
5649
    self.lu.LogStep(6, steps_total, "Removing old storage")
5650
    self._RemoveOldStorage(self.target_node, iv_names)
5651

    
5652
  def _ExecDrbd8Secondary(self):
5653
    """Replace the secondary node for DRBD 8.
5654

5655
    The algorithm for replace is quite complicated:
5656
      - for all disks of the instance:
5657
        - create new LVs on the new node with same names
5658
        - shutdown the drbd device on the old secondary
5659
        - disconnect the drbd network on the primary
5660
        - create the drbd device on the new secondary
5661
        - network attach the drbd on the primary, using an artifice:
5662
          the drbd code for Attach() will connect to the network if it
5663
          finds a device which is connected to the good local disks but
5664
          not network enabled
5665
      - wait for sync across all devices
5666
      - remove all disks from the old secondary
5667

5668
    Failures are not very well handled.
5669

5670
    """
5671
    steps_total = 6
5672

    
5673
    # Step: check device activation
5674
    self.lu.LogStep(1, steps_total, "Check device existence")
5675
    self._CheckDisksExistence([self.instance.primary_node])
5676
    self._CheckVolumeGroup([self.instance.primary_node])
5677

    
5678
    # Step: check other node consistency
5679
    self.lu.LogStep(2, steps_total, "Check peer consistency")
5680
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
5681

    
5682
    # Step: create new storage
5683
    self.lu.LogStep(3, steps_total, "Allocate new storage")
5684
    for idx, dev in enumerate(self.instance.disks):
5685
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
5686
                      (self.new_node, idx))
5687
      # we pass force_create=True to force LVM creation
5688
      for new_lv in dev.children:
5689
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
5690
                        _GetInstanceInfoText(self.instance), False)
5691

    
5692
    # Step 4: dbrd minors and drbd setups changes
5693
    # after this, we must manually remove the drbd minors on both the
5694
    # error and the success paths
5695
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
5696
    minors = self.cfg.AllocateDRBDMinor([self.new_node for dev in self.instance.disks],
5697
                                        self.instance.name)
5698
    logging.debug("Allocated minors %r" % (minors,))
5699

    
5700
    iv_names = {}
5701
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
5702
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" % (self.new_node, idx))
5703
      # create new devices on new_node; note that we create two IDs:
5704
      # one without port, so the drbd will be activated without
5705
      # networking information on the new node at this stage, and one
5706
      # with network, for the latter activation in step 4
5707
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
5708
      if self.instance.primary_node == o_node1:
5709
        p_minor = o_minor1
5710
      else:
5711
        p_minor = o_minor2
5712

    
5713
      new_alone_id = (self.instance.primary_node, self.new_node, None, p_minor, new_minor, o_secret)
5714
      new_net_id = (self.instance.primary_node, self.new_node, o_port, p_minor, new_minor, o_secret)
5715

    
5716
      iv_names[idx] = (dev, dev.children, new_net_id)
5717
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
5718
                    new_net_id)
5719
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
5720
                              logical_id=new_alone_id,
5721
                              children=dev.children,
5722
                              size=dev.size)
5723
      try:
5724
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
5725
                              _GetInstanceInfoText(self.instance), False)
5726
      except errors.GenericError:
5727
        self.cfg.ReleaseDRBDMinors(self.instance.name)
5728
        raise
5729

    
5730
    # We have new devices, shutdown the drbd on the old secondary
5731
    for idx, dev in enumerate(self.instance.disks):
5732
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
5733
      self.cfg.SetDiskID(dev, self.target_node)
5734
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
5735
      if msg:
5736
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
5737
                           "node: %s" % (idx, msg),
5738
                           hint=("Please cleanup this device manually as"
5739
                                 " soon as possible"))
5740

    
5741
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
5742
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node], self.node_secondary_ip,
5743
                                               self.instance.disks)[self.instance.primary_node]
5744

    
5745
    msg = result.fail_msg
5746
    if msg:
5747
      # detaches didn't succeed (unlikely)
5748
      self.cfg.ReleaseDRBDMinors(self.instance.name)
5749
      raise errors.OpExecError("Can't detach the disks from the network on"
5750
                               " old node: %s" % (msg,))
5751

    
5752
    # if we managed to detach at least one, we update all the disks of
5753
    # the instance to point to the new secondary
5754
    self.lu.LogInfo("Updating instance configuration")
5755
    for dev, _, new_logical_id in iv_names.itervalues():
5756
      dev.logical_id = new_logical_id
5757
      self.cfg.SetDiskID(dev, self.instance.primary_node)
5758

    
5759
    self.cfg.Update(self.instance)
5760

    
5761
    # and now perform the drbd attach
5762
    self.lu.LogInfo("Attaching primary drbds to new secondary"
5763
                    " (standalone => connected)")
5764
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node, self.new_node], self.node_secondary_ip,
5765
                                           self.instance.disks, self.instance.name,
5766
                                           False)
5767
    for to_node, to_result in result.items():
5768
      msg = to_result.fail_msg
5769
      if msg:
5770
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s", to_node, msg,
5771
                           hint=("please do a gnt-instance info to see the"
5772
                                 " status of disks"))
5773

    
5774
    # Wait for sync
5775
    # This can fail as the old devices are degraded and _WaitForSync
5776
    # does a combined result over all disks, so we don't check its return value
5777
    self.lu.LogStep(5, steps_total, "Sync devices")
5778
    _WaitForSync(self.lu, self.instance, unlock=True)
5779

    
5780
    # Check all devices manually
5781
    self._CheckDevices(self.instance.primary_node, iv_names)
5782

    
5783
    # Step: remove old storage
5784
    self.lu.LogStep(6, steps_total, "Removing old storage")
5785
    self._RemoveOldStorage(self.target_node, iv_names)
5786

    
5787

    
5788
class LUGrowDisk(LogicalUnit):
5789
  """Grow a disk of an instance.
5790

5791
  """
5792
  HPATH = "disk-grow"
5793
  HTYPE = constants.HTYPE_INSTANCE
5794
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
5795
  REQ_BGL = False
5796

    
5797
  def ExpandNames(self):
5798
    self._ExpandAndLockInstance()
5799
    self.needed_locks[locking.LEVEL_NODE] = []
5800
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5801

    
5802
  def DeclareLocks(self, level):
5803
    if level == locking.LEVEL_NODE:
5804
      self._LockInstancesNodes()
5805

    
5806
  def BuildHooksEnv(self):
5807
    """Build hooks env.
5808

5809
    This runs on the master, the primary and all the secondaries.
5810

5811
    """
5812
    env = {
5813
      "DISK": self.op.disk,
5814
      "AMOUNT": self.op.amount,
5815
      }
5816
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5817
    nl = [
5818
      self.cfg.GetMasterNode(),
5819
      self.instance.primary_node,
5820
      ]
5821
    return env, nl, nl
5822

    
5823
  def CheckPrereq(self):
5824
    """Check prerequisites.
5825

5826
    This checks that the instance is in the cluster.
5827

5828
    """
5829
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5830
    assert instance is not None, \
5831
      "Cannot retrieve locked instance %s" % self.op.instance_name
5832
    nodenames = list(instance.all_nodes)
5833
    for node in nodenames:
5834
      _CheckNodeOnline(self, node)
5835

    
5836

    
5837
    self.instance = instance
5838

    
5839
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
5840
      raise errors.OpPrereqError("Instance's disk layout does not support"
5841
                                 " growing.")
5842

    
5843
    self.disk = instance.FindDisk(self.op.disk)
5844

    
5845
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
5846
                                       instance.hypervisor)
5847
    for node in nodenames:
5848
      info = nodeinfo[node]
5849
      info.Raise("Cannot get current information from node %s" % node)
5850
      vg_free = info.payload.get('vg_free', None)
5851
      if not isinstance(vg_free, int):
5852
        raise errors.OpPrereqError("Can't compute free disk space on"
5853
                                   " node %s" % node)
5854
      if self.op.amount > vg_free:
5855
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
5856
                                   " %d MiB available, %d MiB required" %
5857
                                   (node, vg_free, self.op.amount))
5858

    
5859
  def Exec(self, feedback_fn):
5860
    """Execute disk grow.
5861

5862
    """
5863
    instance = self.instance
5864
    disk = self.disk
5865
    for node in instance.all_nodes:
5866
      self.cfg.SetDiskID(disk, node)
5867
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
5868
      result.Raise("Grow request failed to node %s" % node)
5869
    disk.RecordGrow(self.op.amount)
5870
    self.cfg.Update(instance)
5871
    if self.op.wait_for_sync:
5872
      disk_abort = not _WaitForSync(self, instance)
5873
      if disk_abort:
5874
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
5875
                             " status.\nPlease check the instance.")
5876

    
5877

    
5878
class LUQueryInstanceData(NoHooksLU):
5879
  """Query runtime instance data.
5880

5881
  """
5882
  _OP_REQP = ["instances", "static"]
5883
  REQ_BGL = False
5884

    
5885
  def ExpandNames(self):
5886
    self.needed_locks = {}
5887
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
5888

    
5889
    if not isinstance(self.op.instances, list):
5890
      raise errors.OpPrereqError("Invalid argument type 'instances'")
5891

    
5892
    if self.op.instances:
5893
      self.wanted_names = []
5894
      for name in self.op.instances:
5895
        full_name = self.cfg.ExpandInstanceName(name)
5896
        if full_name is None:
5897
          raise errors.OpPrereqError("Instance '%s' not known" % name)
5898
        self.wanted_names.append(full_name)
5899
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
5900
    else:
5901
      self.wanted_names = None
5902
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5903

    
5904
    self.needed_locks[locking.LEVEL_NODE] = []
5905
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5906

    
5907
  def DeclareLocks(self, level):
5908
    if level == locking.LEVEL_NODE:
5909
      self._LockInstancesNodes()
5910

    
5911
  def CheckPrereq(self):
5912
    """Check prerequisites.
5913

5914
    This only checks the optional instance list against the existing names.
5915

5916
    """
5917
    if self.wanted_names is None:
5918
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5919

    
5920
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
5921
                             in self.wanted_names]
5922
    return
5923

    
5924
  def _ComputeDiskStatus(self, instance, snode, dev):
5925
    """Compute block device status.
5926

5927
    """
5928
    static = self.op.static
5929
    if not static:
5930
      self.cfg.SetDiskID(dev, instance.primary_node)
5931
      dev_pstatus = self.rpc.call_blockdev_find(instance.primary_node, dev)
5932
      if dev_pstatus.offline:
5933
        dev_pstatus = None
5934
      else:
5935
        dev_pstatus.Raise("Can't compute disk status for %s" % instance.name)
5936
        dev_pstatus = dev_pstatus.payload
5937
    else:
5938
      dev_pstatus = None
5939

    
5940
    if dev.dev_type in constants.LDS_DRBD:
5941
      # we change the snode then (otherwise we use the one passed in)
5942
      if dev.logical_id[0] == instance.primary_node:
5943
        snode = dev.logical_id[1]
5944
      else:
5945
        snode = dev.logical_id[0]
5946

    
5947
    if snode and not static:
5948
      self.cfg.SetDiskID(dev, snode)
5949
      dev_sstatus = self.rpc.call_blockdev_find(snode, dev)
5950
      if dev_sstatus.offline:
5951
        dev_sstatus = None
5952
      else:
5953
        dev_sstatus.Raise("Can't compute disk status for %s" % instance.name)
5954
        dev_sstatus = dev_sstatus.payload
5955
    else:
5956
      dev_sstatus = None
5957

    
5958
    if dev.children:
5959
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
5960
                      for child in dev.children]
5961
    else:
5962
      dev_children = []
5963

    
5964
    data = {
5965
      "iv_name": dev.iv_name,
5966
      "dev_type": dev.dev_type,
5967
      "logical_id": dev.logical_id,
5968
      "physical_id": dev.physical_id,
5969
      "pstatus": dev_pstatus,
5970
      "sstatus": dev_sstatus,
5971
      "children": dev_children,
5972
      "mode": dev.mode,
5973
      "size": dev.size,
5974
      }
5975

    
5976
    return data
5977

    
5978
  def Exec(self, feedback_fn):
5979
    """Gather and return data"""
5980
    result = {}
5981

    
5982
    cluster = self.cfg.GetClusterInfo()
5983

    
5984
    for instance in self.wanted_instances:
5985
      if not self.op.static:
5986
        remote_info = self.rpc.call_instance_info(instance.primary_node,
5987
                                                  instance.name,
5988
                                                  instance.hypervisor)
5989
        remote_info.Raise("Error checking node %s" % instance.primary_node)
5990
        remote_info = remote_info.payload
5991
        if remote_info and "state" in remote_info:
5992
          remote_state = "up"
5993
        else:
5994
          remote_state = "down"
5995
      else:
5996
        remote_state = None
5997
      if instance.admin_up:
5998
        config_state = "up"
5999
      else:
6000
        config_state = "down"
6001

    
6002
      disks = [self._ComputeDiskStatus(instance, None, device)
6003
               for device in instance.disks]
6004

    
6005
      idict = {
6006
        "name": instance.name,
6007
        "config_state": config_state,
6008
        "run_state": remote_state,
6009
        "pnode": instance.primary_node,
6010
        "snodes": instance.secondary_nodes,
6011
        "os": instance.os,
6012
        # this happens to be the same format used for hooks
6013
        "nics": _NICListToTuple(self, instance.nics),
6014
        "disks": disks,
6015
        "hypervisor": instance.hypervisor,
6016
        "network_port": instance.network_port,
6017
        "hv_instance": instance.hvparams,
6018
        "hv_actual": cluster.FillHV(instance),
6019
        "be_instance": instance.beparams,
6020
        "be_actual": cluster.FillBE(instance),
6021
        }
6022

    
6023
      result[instance.name] = idict
6024

    
6025
    return result
6026

    
6027

    
6028
class LUSetInstanceParams(LogicalUnit):
6029
  """Modifies an instances's parameters.
6030

6031
  """
6032
  HPATH = "instance-modify"
6033
  HTYPE = constants.HTYPE_INSTANCE
6034
  _OP_REQP = ["instance_name"]
6035
  REQ_BGL = False
6036

    
6037
  def CheckArguments(self):
6038
    if not hasattr(self.op, 'nics'):
6039
      self.op.nics = []
6040
    if not hasattr(self.op, 'disks'):
6041
      self.op.disks = []
6042
    if not hasattr(self.op, 'beparams'):
6043
      self.op.beparams = {}
6044
    if not hasattr(self.op, 'hvparams'):
6045
      self.op.hvparams = {}
6046
    self.op.force = getattr(self.op, "force", False)
6047
    if not (self.op.nics or self.op.disks or
6048
            self.op.hvparams or self.op.beparams):
6049
      raise errors.OpPrereqError("No changes submitted")
6050

    
6051
    # Disk validation
6052
    disk_addremove = 0
6053
    for disk_op, disk_dict in self.op.disks:
6054
      if disk_op == constants.DDM_REMOVE:
6055
        disk_addremove += 1
6056
        continue
6057
      elif disk_op == constants.DDM_ADD:
6058
        disk_addremove += 1
6059
      else:
6060
        if not isinstance(disk_op, int):
6061
          raise errors.OpPrereqError("Invalid disk index")
6062
        if not isinstance(disk_dict, dict):
6063
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
6064
          raise errors.OpPrereqError(msg)
6065

    
6066
      if disk_op == constants.DDM_ADD:
6067
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
6068
        if mode not in constants.DISK_ACCESS_SET:
6069
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
6070
        size = disk_dict.get('size', None)
6071
        if size is None:
6072
          raise errors.OpPrereqError("Required disk parameter size missing")
6073
        try:
6074
          size = int(size)
6075
        except ValueError, err:
6076
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
6077
                                     str(err))
6078
        disk_dict['size'] = size
6079
      else:
6080
        # modification of disk
6081
        if 'size' in disk_dict:
6082
          raise errors.OpPrereqError("Disk size change not possible, use"
6083
                                     " grow-disk")
6084

    
6085
    if disk_addremove > 1:
6086
      raise errors.OpPrereqError("Only one disk add or remove operation"
6087
                                 " supported at a time")
6088

    
6089
    # NIC validation
6090
    nic_addremove = 0
6091
    for nic_op, nic_dict in self.op.nics:
6092
      if nic_op == constants.DDM_REMOVE:
6093
        nic_addremove += 1
6094
        continue
6095
      elif nic_op == constants.DDM_ADD:
6096
        nic_addremove += 1
6097
      else:
6098
        if not isinstance(nic_op, int):
6099
          raise errors.OpPrereqError("Invalid nic index")
6100
        if not isinstance(nic_dict, dict):
6101
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
6102
          raise errors.OpPrereqError(msg)
6103

    
6104
      # nic_dict should be a dict
6105
      nic_ip = nic_dict.get('ip', None)
6106
      if nic_ip is not None:
6107
        if nic_ip.lower() == constants.VALUE_NONE:
6108
          nic_dict['ip'] = None
6109
        else:
6110
          if not utils.IsValidIP(nic_ip):
6111
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
6112

    
6113
      nic_bridge = nic_dict.get('bridge', None)
6114
      nic_link = nic_dict.get('link', None)
6115
      if nic_bridge and nic_link:
6116
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6117
                                   " at the same time")
6118
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
6119
        nic_dict['bridge'] = None
6120
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
6121
        nic_dict['link'] = None
6122

    
6123
      if nic_op == constants.DDM_ADD:
6124
        nic_mac = nic_dict.get('mac', None)
6125
        if nic_mac is None:
6126
          nic_dict['mac'] = constants.VALUE_AUTO
6127

    
6128
      if 'mac' in nic_dict:
6129
        nic_mac = nic_dict['mac']
6130
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6131
          if not utils.IsValidMac(nic_mac):
6132
            raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
6133
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
6134
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
6135
                                     " modifying an existing nic")
6136

    
6137
    if nic_addremove > 1:
6138
      raise errors.OpPrereqError("Only one NIC add or remove operation"
6139
                                 " supported at a time")
6140

    
6141
  def ExpandNames(self):
6142
    self._ExpandAndLockInstance()
6143
    self.needed_locks[locking.LEVEL_NODE] = []
6144
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6145

    
6146
  def DeclareLocks(self, level):
6147
    if level == locking.LEVEL_NODE:
6148
      self._LockInstancesNodes()
6149

    
6150
  def BuildHooksEnv(self):
6151
    """Build hooks env.
6152

6153
    This runs on the master, primary and secondaries.
6154

6155
    """
6156
    args = dict()
6157
    if constants.BE_MEMORY in self.be_new:
6158
      args['memory'] = self.be_new[constants.BE_MEMORY]
6159
    if constants.BE_VCPUS in self.be_new:
6160
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
6161
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
6162
    # information at all.
6163
    if self.op.nics:
6164
      args['nics'] = []
6165
      nic_override = dict(self.op.nics)
6166
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
6167
      for idx, nic in enumerate(self.instance.nics):
6168
        if idx in nic_override:
6169
          this_nic_override = nic_override[idx]
6170
        else:
6171
          this_nic_override = {}
6172
        if 'ip' in this_nic_override:
6173
          ip = this_nic_override['ip']
6174
        else:
6175
          ip = nic.ip
6176
        if 'mac' in this_nic_override:
6177
          mac = this_nic_override['mac']
6178
        else:
6179
          mac = nic.mac
6180
        if idx in self.nic_pnew:
6181
          nicparams = self.nic_pnew[idx]
6182
        else:
6183
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
6184
        mode = nicparams[constants.NIC_MODE]
6185
        link = nicparams[constants.NIC_LINK]
6186
        args['nics'].append((ip, mac, mode, link))
6187
      if constants.DDM_ADD in nic_override:
6188
        ip = nic_override[constants.DDM_ADD].get('ip', None)
6189
        mac = nic_override[constants.DDM_ADD]['mac']
6190
        nicparams = self.nic_pnew[constants.DDM_ADD]
6191
        mode = nicparams[constants.NIC_MODE]
6192
        link = nicparams[constants.NIC_LINK]
6193
        args['nics'].append((ip, mac, mode, link))
6194
      elif constants.DDM_REMOVE in nic_override:
6195
        del args['nics'][-1]
6196

    
6197
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
6198
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6199
    return env, nl, nl
6200

    
6201
  def _GetUpdatedParams(self, old_params, update_dict,
6202
                        default_values, parameter_types):
6203
    """Return the new params dict for the given params.
6204

6205
    @type old_params: dict
6206
    @param old_params: old parameters
6207
    @type update_dict: dict
6208
    @param update_dict: dict containing new parameter values,
6209
                        or constants.VALUE_DEFAULT to reset the
6210
                        parameter to its default value
6211
    @type default_values: dict
6212
    @param default_values: default values for the filled parameters
6213
    @type parameter_types: dict
6214
    @param parameter_types: dict mapping target dict keys to types
6215
                            in constants.ENFORCEABLE_TYPES
6216
    @rtype: (dict, dict)
6217
    @return: (new_parameters, filled_parameters)
6218

6219
    """
6220
    params_copy = copy.deepcopy(old_params)
6221
    for key, val in update_dict.iteritems():
6222
      if val == constants.VALUE_DEFAULT:
6223
        try:
6224
          del params_copy[key]
6225
        except KeyError:
6226
          pass
6227
      else:
6228
        params_copy[key] = val
6229
    utils.ForceDictType(params_copy, parameter_types)
6230
    params_filled = objects.FillDict(default_values, params_copy)
6231
    return (params_copy, params_filled)
6232

    
6233
  def CheckPrereq(self):
6234
    """Check prerequisites.
6235

6236
    This only checks the instance list against the existing names.
6237

6238
    """
6239
    self.force = self.op.force
6240

    
6241
    # checking the new params on the primary/secondary nodes
6242

    
6243
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6244
    cluster = self.cluster = self.cfg.GetClusterInfo()
6245
    assert self.instance is not None, \
6246
      "Cannot retrieve locked instance %s" % self.op.instance_name
6247
    pnode = instance.primary_node
6248
    nodelist = list(instance.all_nodes)
6249

    
6250
    # hvparams processing
6251
    if self.op.hvparams:
6252
      i_hvdict, hv_new = self._GetUpdatedParams(
6253
                             instance.hvparams, self.op.hvparams,
6254
                             cluster.hvparams[instance.hypervisor],
6255
                             constants.HVS_PARAMETER_TYPES)
6256
      # local check
6257
      hypervisor.GetHypervisor(
6258
        instance.hypervisor).CheckParameterSyntax(hv_new)
6259
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
6260
      self.hv_new = hv_new # the new actual values
6261
      self.hv_inst = i_hvdict # the new dict (without defaults)
6262
    else:
6263
      self.hv_new = self.hv_inst = {}
6264

    
6265
    # beparams processing
6266
    if self.op.beparams:
6267
      i_bedict, be_new = self._GetUpdatedParams(
6268
                             instance.beparams, self.op.beparams,
6269
                             cluster.beparams[constants.PP_DEFAULT],
6270
                             constants.BES_PARAMETER_TYPES)
6271
      self.be_new = be_new # the new actual values
6272
      self.be_inst = i_bedict # the new dict (without defaults)
6273
    else:
6274
      self.be_new = self.be_inst = {}
6275

    
6276
    self.warn = []
6277

    
6278
    if constants.BE_MEMORY in self.op.beparams and not self.force:
6279
      mem_check_list = [pnode]
6280
      if be_new[constants.BE_AUTO_BALANCE]:
6281
        # either we changed auto_balance to yes or it was from before
6282
        mem_check_list.extend(instance.secondary_nodes)
6283
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
6284
                                                  instance.hypervisor)
6285
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
6286
                                         instance.hypervisor)
6287
      pninfo = nodeinfo[pnode]
6288
      msg = pninfo.fail_msg
6289
      if msg:
6290
        # Assume the primary node is unreachable and go ahead
6291
        self.warn.append("Can't get info from primary node %s: %s" %
6292
                         (pnode,  msg))
6293
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
6294
        self.warn.append("Node data from primary node %s doesn't contain"
6295
                         " free memory information" % pnode)
6296
      elif instance_info.fail_msg:
6297
        self.warn.append("Can't get instance runtime information: %s" %
6298
                        instance_info.fail_msg)
6299
      else:
6300
        if instance_info.payload:
6301
          current_mem = int(instance_info.payload['memory'])
6302
        else:
6303
          # Assume instance not running
6304
          # (there is a slight race condition here, but it's not very probable,
6305
          # and we have no other way to check)
6306
          current_mem = 0
6307
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
6308
                    pninfo.payload['memory_free'])
6309
        if miss_mem > 0:
6310
          raise errors.OpPrereqError("This change will prevent the instance"
6311
                                     " from starting, due to %d MB of memory"
6312
                                     " missing on its primary node" % miss_mem)
6313

    
6314
      if be_new[constants.BE_AUTO_BALANCE]:
6315
        for node, nres in nodeinfo.items():
6316
          if node not in instance.secondary_nodes:
6317
            continue
6318
          msg = nres.fail_msg
6319
          if msg:
6320
            self.warn.append("Can't get info from secondary node %s: %s" %
6321
                             (node, msg))
6322
          elif not isinstance(nres.payload.get('memory_free', None), int):
6323
            self.warn.append("Secondary node %s didn't return free"
6324
                             " memory information" % node)
6325
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
6326
            self.warn.append("Not enough memory to failover instance to"
6327
                             " secondary node %s" % node)
6328

    
6329
    # NIC processing
6330
    self.nic_pnew = {}
6331
    self.nic_pinst = {}
6332
    for nic_op, nic_dict in self.op.nics:
6333
      if nic_op == constants.DDM_REMOVE:
6334
        if not instance.nics:
6335
          raise errors.OpPrereqError("Instance has no NICs, cannot remove")
6336
        continue
6337
      if nic_op != constants.DDM_ADD:
6338
        # an existing nic
6339
        if nic_op < 0 or nic_op >= len(instance.nics):
6340
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
6341
                                     " are 0 to %d" %
6342
                                     (nic_op, len(instance.nics)))
6343
        old_nic_params = instance.nics[nic_op].nicparams
6344
        old_nic_ip = instance.nics[nic_op].ip
6345
      else:
6346
        old_nic_params = {}
6347
        old_nic_ip = None
6348

    
6349
      update_params_dict = dict([(key, nic_dict[key])
6350
                                 for key in constants.NICS_PARAMETERS
6351
                                 if key in nic_dict])
6352

    
6353
      if 'bridge' in nic_dict:
6354
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
6355

    
6356
      new_nic_params, new_filled_nic_params = \
6357
          self._GetUpdatedParams(old_nic_params, update_params_dict,
6358
                                 cluster.nicparams[constants.PP_DEFAULT],
6359
                                 constants.NICS_PARAMETER_TYPES)
6360
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
6361
      self.nic_pinst[nic_op] = new_nic_params
6362
      self.nic_pnew[nic_op] = new_filled_nic_params
6363
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
6364

    
6365
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
6366
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
6367
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
6368
        if msg:
6369
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
6370
          if self.force:
6371
            self.warn.append(msg)
6372
          else:
6373
            raise errors.OpPrereqError(msg)
6374
      if new_nic_mode == constants.NIC_MODE_ROUTED:
6375
        if 'ip' in nic_dict:
6376
          nic_ip = nic_dict['ip']
6377
        else:
6378
          nic_ip = old_nic_ip
6379
        if nic_ip is None:
6380
          raise errors.OpPrereqError('Cannot set the nic ip to None'
6381
                                     ' on a routed nic')
6382
      if 'mac' in nic_dict:
6383
        nic_mac = nic_dict['mac']
6384
        if nic_mac is None:
6385
          raise errors.OpPrereqError('Cannot set the nic mac to None')
6386
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6387
          # otherwise generate the mac
6388
          nic_dict['mac'] = self.cfg.GenerateMAC()
6389
        else:
6390
          # or validate/reserve the current one
6391
          if self.cfg.IsMacInUse(nic_mac):
6392
            raise errors.OpPrereqError("MAC address %s already in use"
6393
                                       " in cluster" % nic_mac)
6394

    
6395
    # DISK processing
6396
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
6397
      raise errors.OpPrereqError("Disk operations not supported for"
6398
                                 " diskless instances")
6399
    for disk_op, disk_dict in self.op.disks:
6400
      if disk_op == constants.DDM_REMOVE:
6401
        if len(instance.disks) == 1:
6402
          raise errors.OpPrereqError("Cannot remove the last disk of"
6403
                                     " an instance")
6404
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
6405
        ins_l = ins_l[pnode]
6406
        msg = ins_l.fail_msg
6407
        if msg:
6408
          raise errors.OpPrereqError("Can't contact node %s: %s" %
6409
                                     (pnode, msg))
6410
        if instance.name in ins_l.payload:
6411
          raise errors.OpPrereqError("Instance is running, can't remove"
6412
                                     " disks.")
6413

    
6414
      if (disk_op == constants.DDM_ADD and
6415
          len(instance.nics) >= constants.MAX_DISKS):
6416
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
6417
                                   " add more" % constants.MAX_DISKS)
6418
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
6419
        # an existing disk
6420
        if disk_op < 0 or disk_op >= len(instance.disks):
6421
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
6422
                                     " are 0 to %d" %
6423
                                     (disk_op, len(instance.disks)))
6424

    
6425
    return
6426

    
6427
  def Exec(self, feedback_fn):
6428
    """Modifies an instance.
6429

6430
    All parameters take effect only at the next restart of the instance.
6431

6432
    """
6433
    # Process here the warnings from CheckPrereq, as we don't have a
6434
    # feedback_fn there.
6435
    for warn in self.warn:
6436
      feedback_fn("WARNING: %s" % warn)
6437

    
6438
    result = []
6439
    instance = self.instance
6440
    cluster = self.cluster
6441
    # disk changes
6442
    for disk_op, disk_dict in self.op.disks:
6443
      if disk_op == constants.DDM_REMOVE:
6444
        # remove the last disk
6445
        device = instance.disks.pop()
6446
        device_idx = len(instance.disks)
6447
        for node, disk in device.ComputeNodeTree(instance.primary_node):
6448
          self.cfg.SetDiskID(disk, node)
6449
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
6450
          if msg:
6451
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
6452
                            " continuing anyway", device_idx, node, msg)
6453
        result.append(("disk/%d" % device_idx, "remove"))
6454
      elif disk_op == constants.DDM_ADD:
6455
        # add a new disk
6456
        if instance.disk_template == constants.DT_FILE:
6457
          file_driver, file_path = instance.disks[0].logical_id
6458
          file_path = os.path.dirname(file_path)
6459
        else:
6460
          file_driver = file_path = None
6461
        disk_idx_base = len(instance.disks)
6462
        new_disk = _GenerateDiskTemplate(self,
6463
                                         instance.disk_template,
6464
                                         instance.name, instance.primary_node,
6465
                                         instance.secondary_nodes,
6466
                                         [disk_dict],
6467
                                         file_path,
6468
                                         file_driver,
6469
                                         disk_idx_base)[0]
6470
        instance.disks.append(new_disk)
6471
        info = _GetInstanceInfoText(instance)
6472

    
6473
        logging.info("Creating volume %s for instance %s",
6474
                     new_disk.iv_name, instance.name)
6475
        # Note: this needs to be kept in sync with _CreateDisks
6476
        #HARDCODE
6477
        for node in instance.all_nodes:
6478
          f_create = node == instance.primary_node
6479
          try:
6480
            _CreateBlockDev(self, node, instance, new_disk,
6481
                            f_create, info, f_create)
6482
          except errors.OpExecError, err:
6483
            self.LogWarning("Failed to create volume %s (%s) on"
6484
                            " node %s: %s",
6485
                            new_disk.iv_name, new_disk, node, err)
6486
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
6487
                       (new_disk.size, new_disk.mode)))
6488
      else:
6489
        # change a given disk
6490
        instance.disks[disk_op].mode = disk_dict['mode']
6491
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
6492
    # NIC changes
6493
    for nic_op, nic_dict in self.op.nics:
6494
      if nic_op == constants.DDM_REMOVE:
6495
        # remove the last nic
6496
        del instance.nics[-1]
6497
        result.append(("nic.%d" % len(instance.nics), "remove"))
6498
      elif nic_op == constants.DDM_ADD:
6499
        # mac and bridge should be set, by now
6500
        mac = nic_dict['mac']
6501
        ip = nic_dict.get('ip', None)
6502
        nicparams = self.nic_pinst[constants.DDM_ADD]
6503
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
6504
        instance.nics.append(new_nic)
6505
        result.append(("nic.%d" % (len(instance.nics) - 1),
6506
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
6507
                       (new_nic.mac, new_nic.ip,
6508
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
6509
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
6510
                       )))
6511
      else:
6512
        for key in 'mac', 'ip':
6513
          if key in nic_dict:
6514
            setattr(instance.nics[nic_op], key, nic_dict[key])
6515
        if nic_op in self.nic_pnew:
6516
          instance.nics[nic_op].nicparams = self.nic_pnew[nic_op]
6517
        for key, val in nic_dict.iteritems():
6518
          result.append(("nic.%s/%d" % (key, nic_op), val))
6519

    
6520
    # hvparams changes
6521
    if self.op.hvparams:
6522
      instance.hvparams = self.hv_inst
6523
      for key, val in self.op.hvparams.iteritems():
6524
        result.append(("hv/%s" % key, val))
6525

    
6526
    # beparams changes
6527
    if self.op.beparams:
6528
      instance.beparams = self.be_inst
6529
      for key, val in self.op.beparams.iteritems():
6530
        result.append(("be/%s" % key, val))
6531

    
6532
    self.cfg.Update(instance)
6533

    
6534
    return result
6535

    
6536

    
6537
class LUQueryExports(NoHooksLU):
6538
  """Query the exports list
6539

6540
  """
6541
  _OP_REQP = ['nodes']
6542
  REQ_BGL = False
6543

    
6544
  def ExpandNames(self):
6545
    self.needed_locks = {}
6546
    self.share_locks[locking.LEVEL_NODE] = 1
6547
    if not self.op.nodes:
6548
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6549
    else:
6550
      self.needed_locks[locking.LEVEL_NODE] = \
6551
        _GetWantedNodes(self, self.op.nodes)
6552

    
6553
  def CheckPrereq(self):
6554
    """Check prerequisites.
6555

6556
    """
6557
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
6558

    
6559
  def Exec(self, feedback_fn):
6560
    """Compute the list of all the exported system images.
6561

6562
    @rtype: dict
6563
    @return: a dictionary with the structure node->(export-list)
6564
        where export-list is a list of the instances exported on
6565
        that node.
6566

6567
    """
6568
    rpcresult = self.rpc.call_export_list(self.nodes)
6569
    result = {}
6570
    for node in rpcresult:
6571
      if rpcresult[node].fail_msg:
6572
        result[node] = False
6573
      else:
6574
        result[node] = rpcresult[node].payload
6575

    
6576
    return result
6577

    
6578

    
6579
class LUExportInstance(LogicalUnit):
6580
  """Export an instance to an image in the cluster.
6581

6582
  """
6583
  HPATH = "instance-export"
6584
  HTYPE = constants.HTYPE_INSTANCE
6585
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
6586
  REQ_BGL = False
6587

    
6588
  def ExpandNames(self):
6589
    self._ExpandAndLockInstance()
6590
    # FIXME: lock only instance primary and destination node
6591
    #
6592
    # Sad but true, for now we have do lock all nodes, as we don't know where
6593
    # the previous export might be, and and in this LU we search for it and
6594
    # remove it from its current node. In the future we could fix this by:
6595
    #  - making a tasklet to search (share-lock all), then create the new one,
6596
    #    then one to remove, after
6597
    #  - removing the removal operation altogether
6598
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6599

    
6600
  def DeclareLocks(self, level):
6601
    """Last minute lock declaration."""
6602
    # All nodes are locked anyway, so nothing to do here.
6603

    
6604
  def BuildHooksEnv(self):
6605
    """Build hooks env.
6606

6607
    This will run on the master, primary node and target node.
6608

6609
    """
6610
    env = {
6611
      "EXPORT_NODE": self.op.target_node,
6612
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
6613
      }
6614
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6615
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
6616
          self.op.target_node]
6617
    return env, nl, nl
6618

    
6619
  def CheckPrereq(self):
6620
    """Check prerequisites.
6621

6622
    This checks that the instance and node names are valid.
6623

6624
    """
6625
    instance_name = self.op.instance_name
6626
    self.instance = self.cfg.GetInstanceInfo(instance_name)
6627
    assert self.instance is not None, \
6628
          "Cannot retrieve locked instance %s" % self.op.instance_name
6629
    _CheckNodeOnline(self, self.instance.primary_node)
6630

    
6631
    self.dst_node = self.cfg.GetNodeInfo(
6632
      self.cfg.ExpandNodeName(self.op.target_node))
6633

    
6634
    if self.dst_node is None:
6635
      # This is wrong node name, not a non-locked node
6636
      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
6637
    _CheckNodeOnline(self, self.dst_node.name)
6638
    _CheckNodeNotDrained(self, self.dst_node.name)
6639

    
6640
    # instance disk type verification
6641
    for disk in self.instance.disks:
6642
      if disk.dev_type == constants.LD_FILE:
6643
        raise errors.OpPrereqError("Export not supported for instances with"
6644
                                   " file-based disks")
6645

    
6646
  def Exec(self, feedback_fn):
6647
    """Export an instance to an image in the cluster.
6648

6649
    """
6650
    instance = self.instance
6651
    dst_node = self.dst_node
6652
    src_node = instance.primary_node
6653
    if self.op.shutdown:
6654
      # shutdown the instance, but not the disks
6655
      result = self.rpc.call_instance_shutdown(src_node, instance)
6656
      result.Raise("Could not shutdown instance %s on"
6657
                   " node %s" % (instance.name, src_node))
6658

    
6659
    vgname = self.cfg.GetVGName()
6660

    
6661
    snap_disks = []
6662

    
6663
    # set the disks ID correctly since call_instance_start needs the
6664
    # correct drbd minor to create the symlinks
6665
    for disk in instance.disks:
6666
      self.cfg.SetDiskID(disk, src_node)
6667

    
6668
    try:
6669
      for idx, disk in enumerate(instance.disks):
6670
        # result.payload will be a snapshot of an lvm leaf of the one we passed
6671
        result = self.rpc.call_blockdev_snapshot(src_node, disk)
6672
        msg = result.fail_msg
6673
        if msg:
6674
          self.LogWarning("Could not snapshot disk/%s on node %s: %s",
6675
                          idx, src_node, msg)
6676
          snap_disks.append(False)
6677
        else:
6678
          disk_id = (vgname, result.payload)
6679
          new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
6680
                                 logical_id=disk_id, physical_id=disk_id,
6681
                                 iv_name=disk.iv_name)
6682
          snap_disks.append(new_dev)
6683

    
6684
    finally:
6685
      if self.op.shutdown and instance.admin_up:
6686
        result = self.rpc.call_instance_start(src_node, instance, None, None)
6687
        msg = result.fail_msg
6688
        if msg:
6689
          _ShutdownInstanceDisks(self, instance)
6690
          raise errors.OpExecError("Could not start instance: %s" % msg)
6691

    
6692
    # TODO: check for size
6693

    
6694
    cluster_name = self.cfg.GetClusterName()
6695
    for idx, dev in enumerate(snap_disks):
6696
      if dev:
6697
        result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
6698
                                               instance, cluster_name, idx)
6699
        msg = result.fail_msg
6700
        if msg:
6701
          self.LogWarning("Could not export disk/%s from node %s to"
6702
                          " node %s: %s", idx, src_node, dst_node.name, msg)
6703
        msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
6704
        if msg:
6705
          self.LogWarning("Could not remove snapshot for disk/%d from node"
6706
                          " %s: %s", idx, src_node, msg)
6707

    
6708
    result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
6709
    msg = result.fail_msg
6710
    if msg:
6711
      self.LogWarning("Could not finalize export for instance %s"
6712
                      " on node %s: %s", instance.name, dst_node.name, msg)
6713

    
6714
    nodelist = self.cfg.GetNodeList()
6715
    nodelist.remove(dst_node.name)
6716

    
6717
    # on one-node clusters nodelist will be empty after the removal
6718
    # if we proceed the backup would be removed because OpQueryExports
6719
    # substitutes an empty list with the full cluster node list.
6720
    iname = instance.name
6721
    if nodelist:
6722
      exportlist = self.rpc.call_export_list(nodelist)
6723
      for node in exportlist:
6724
        if exportlist[node].fail_msg:
6725
          continue
6726
        if iname in exportlist[node].payload:
6727
          msg = self.rpc.call_export_remove(node, iname).fail_msg
6728
          if msg:
6729
            self.LogWarning("Could not remove older export for instance %s"
6730
                            " on node %s: %s", iname, node, msg)
6731

    
6732

    
6733
class LURemoveExport(NoHooksLU):
6734
  """Remove exports related to the named instance.
6735

6736
  """
6737
  _OP_REQP = ["instance_name"]
6738
  REQ_BGL = False
6739

    
6740
  def ExpandNames(self):
6741
    self.needed_locks = {}
6742
    # We need all nodes to be locked in order for RemoveExport to work, but we
6743
    # don't need to lock the instance itself, as nothing will happen to it (and
6744
    # we can remove exports also for a removed instance)
6745
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6746

    
6747
  def CheckPrereq(self):
6748
    """Check prerequisites.
6749
    """
6750
    pass
6751

    
6752
  def Exec(self, feedback_fn):
6753
    """Remove any export.
6754

6755
    """
6756
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
6757
    # If the instance was not found we'll try with the name that was passed in.
6758
    # This will only work if it was an FQDN, though.
6759
    fqdn_warn = False
6760
    if not instance_name:
6761
      fqdn_warn = True
6762
      instance_name = self.op.instance_name
6763

    
6764
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6765
    exportlist = self.rpc.call_export_list(locked_nodes)
6766
    found = False
6767
    for node in exportlist:
6768
      msg = exportlist[node].fail_msg
6769
      if msg:
6770
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
6771
        continue
6772
      if instance_name in exportlist[node].payload:
6773
        found = True
6774
        result = self.rpc.call_export_remove(node, instance_name)
6775
        msg = result.fail_msg
6776
        if msg:
6777
          logging.error("Could not remove export for instance %s"
6778
                        " on node %s: %s", instance_name, node, msg)
6779

    
6780
    if fqdn_warn and not found:
6781
      feedback_fn("Export not found. If trying to remove an export belonging"
6782
                  " to a deleted instance please use its Fully Qualified"
6783
                  " Domain Name.")
6784

    
6785

    
6786
class TagsLU(NoHooksLU):
6787
  """Generic tags LU.
6788

6789
  This is an abstract class which is the parent of all the other tags LUs.
6790

6791
  """
6792

    
6793
  def ExpandNames(self):
6794
    self.needed_locks = {}
6795
    if self.op.kind == constants.TAG_NODE:
6796
      name = self.cfg.ExpandNodeName(self.op.name)
6797
      if name is None:
6798
        raise errors.OpPrereqError("Invalid node name (%s)" %
6799
                                   (self.op.name,))
6800
      self.op.name = name
6801
      self.needed_locks[locking.LEVEL_NODE] = name
6802
    elif self.op.kind == constants.TAG_INSTANCE:
6803
      name = self.cfg.ExpandInstanceName(self.op.name)
6804
      if name is None:
6805
        raise errors.OpPrereqError("Invalid instance name (%s)" %
6806
                                   (self.op.name,))
6807
      self.op.name = name
6808
      self.needed_locks[locking.LEVEL_INSTANCE] = name
6809

    
6810
  def CheckPrereq(self):
6811
    """Check prerequisites.
6812

6813
    """
6814
    if self.op.kind == constants.TAG_CLUSTER:
6815
      self.target = self.cfg.GetClusterInfo()
6816
    elif self.op.kind == constants.TAG_NODE:
6817
      self.target = self.cfg.GetNodeInfo(self.op.name)
6818
    elif self.op.kind == constants.TAG_INSTANCE:
6819
      self.target = self.cfg.GetInstanceInfo(self.op.name)
6820
    else:
6821
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
6822
                                 str(self.op.kind))
6823

    
6824

    
6825
class LUGetTags(TagsLU):
6826
  """Returns the tags of a given object.
6827

6828
  """
6829
  _OP_REQP = ["kind", "name"]
6830
  REQ_BGL = False
6831

    
6832
  def Exec(self, feedback_fn):
6833
    """Returns the tag list.
6834

6835
    """
6836
    return list(self.target.GetTags())
6837

    
6838

    
6839
class LUSearchTags(NoHooksLU):
6840
  """Searches the tags for a given pattern.
6841

6842
  """
6843
  _OP_REQP = ["pattern"]
6844
  REQ_BGL = False
6845

    
6846
  def ExpandNames(self):
6847
    self.needed_locks = {}
6848

    
6849
  def CheckPrereq(self):
6850
    """Check prerequisites.
6851

6852
    This checks the pattern passed for validity by compiling it.
6853

6854
    """
6855
    try:
6856
      self.re = re.compile(self.op.pattern)
6857
    except re.error, err:
6858
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
6859
                                 (self.op.pattern, err))
6860

    
6861
  def Exec(self, feedback_fn):
6862
    """Returns the tag list.
6863

6864
    """
6865
    cfg = self.cfg
6866
    tgts = [("/cluster", cfg.GetClusterInfo())]
6867
    ilist = cfg.GetAllInstancesInfo().values()
6868
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
6869
    nlist = cfg.GetAllNodesInfo().values()
6870
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
6871
    results = []
6872
    for path, target in tgts:
6873
      for tag in target.GetTags():
6874
        if self.re.search(tag):
6875
          results.append((path, tag))
6876
    return results
6877

    
6878

    
6879
class LUAddTags(TagsLU):
6880
  """Sets a tag on a given object.
6881

6882
  """
6883
  _OP_REQP = ["kind", "name", "tags"]
6884
  REQ_BGL = False
6885

    
6886
  def CheckPrereq(self):
6887
    """Check prerequisites.
6888

6889
    This checks the type and length of the tag name and value.
6890

6891
    """
6892
    TagsLU.CheckPrereq(self)
6893
    for tag in self.op.tags:
6894
      objects.TaggableObject.ValidateTag(tag)
6895

    
6896
  def Exec(self, feedback_fn):
6897
    """Sets the tag.
6898

6899
    """
6900
    try:
6901
      for tag in self.op.tags:
6902
        self.target.AddTag(tag)
6903
    except errors.TagError, err:
6904
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
6905
    try:
6906
      self.cfg.Update(self.target)
6907
    except errors.ConfigurationError:
6908
      raise errors.OpRetryError("There has been a modification to the"
6909
                                " config file and the operation has been"
6910
                                " aborted. Please retry.")
6911

    
6912

    
6913
class LUDelTags(TagsLU):
6914
  """Delete a list of tags from a given object.
6915

6916
  """
6917
  _OP_REQP = ["kind", "name", "tags"]
6918
  REQ_BGL = False
6919

    
6920
  def CheckPrereq(self):
6921
    """Check prerequisites.
6922

6923
    This checks that we have the given tag.
6924

6925
    """
6926
    TagsLU.CheckPrereq(self)
6927
    for tag in self.op.tags:
6928
      objects.TaggableObject.ValidateTag(tag)
6929
    del_tags = frozenset(self.op.tags)
6930
    cur_tags = self.target.GetTags()
6931
    if not del_tags <= cur_tags:
6932
      diff_tags = del_tags - cur_tags
6933
      diff_names = ["'%s'" % tag for tag in diff_tags]
6934
      diff_names.sort()
6935
      raise errors.OpPrereqError("Tag(s) %s not found" %
6936
                                 (",".join(diff_names)))
6937

    
6938
  def Exec(self, feedback_fn):
6939
    """Remove the tag from the object.
6940

6941
    """
6942
    for tag in self.op.tags:
6943
      self.target.RemoveTag(tag)
6944
    try:
6945
      self.cfg.Update(self.target)
6946
    except errors.ConfigurationError:
6947
      raise errors.OpRetryError("There has been a modification to the"
6948
                                " config file and the operation has been"
6949
                                " aborted. Please retry.")
6950

    
6951

    
6952
class LUTestDelay(NoHooksLU):
6953
  """Sleep for a specified amount of time.
6954

6955
  This LU sleeps on the master and/or nodes for a specified amount of
6956
  time.
6957

6958
  """
6959
  _OP_REQP = ["duration", "on_master", "on_nodes"]
6960
  REQ_BGL = False
6961

    
6962
  def ExpandNames(self):
6963
    """Expand names and set required locks.
6964

6965
    This expands the node list, if any.
6966

6967
    """
6968
    self.needed_locks = {}
6969
    if self.op.on_nodes:
6970
      # _GetWantedNodes can be used here, but is not always appropriate to use
6971
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
6972
      # more information.
6973
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
6974
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
6975

    
6976
  def CheckPrereq(self):
6977
    """Check prerequisites.
6978

6979
    """
6980

    
6981
  def Exec(self, feedback_fn):
6982
    """Do the actual sleep.
6983

6984
    """
6985
    if self.op.on_master:
6986
      if not utils.TestDelay(self.op.duration):
6987
        raise errors.OpExecError("Error during master delay test")
6988
    if self.op.on_nodes:
6989
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
6990
      for node, node_result in result.items():
6991
        node_result.Raise("Failure during rpc call to node %s" % node)
6992

    
6993

    
6994
class IAllocator(object):
6995
  """IAllocator framework.
6996

6997
  An IAllocator instance has three sets of attributes:
6998
    - cfg that is needed to query the cluster
6999
    - input data (all members of the _KEYS class attribute are required)
7000
    - four buffer attributes (in|out_data|text), that represent the
7001
      input (to the external script) in text and data structure format,
7002
      and the output from it, again in two formats
7003
    - the result variables from the script (success, info, nodes) for
7004
      easy usage
7005

7006
  """
7007
  _ALLO_KEYS = [
7008
    "mem_size", "disks", "disk_template",
7009
    "os", "tags", "nics", "vcpus", "hypervisor",
7010
    ]
7011
  _RELO_KEYS = [
7012
    "relocate_from",
7013
    ]
7014

    
7015
  def __init__(self, cfg, rpc, mode, name, **kwargs):
7016
    self.cfg = cfg
7017
    self.rpc = rpc
7018
    # init buffer variables
7019
    self.in_text = self.out_text = self.in_data = self.out_data = None
7020
    # init all input fields so that pylint is happy
7021
    self.mode = mode
7022
    self.name = name
7023
    self.mem_size = self.disks = self.disk_template = None
7024
    self.os = self.tags = self.nics = self.vcpus = None
7025
    self.hypervisor = None
7026
    self.relocate_from = None
7027
    # computed fields
7028
    self.required_nodes = None
7029
    # init result fields
7030
    self.success = self.info = self.nodes = None
7031
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7032
      keyset = self._ALLO_KEYS
7033
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
7034
      keyset = self._RELO_KEYS
7035
    else:
7036
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
7037
                                   " IAllocator" % self.mode)
7038
    for key in kwargs:
7039
      if key not in keyset:
7040
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
7041
                                     " IAllocator" % key)
7042
      setattr(self, key, kwargs[key])
7043
    for key in keyset:
7044
      if key not in kwargs:
7045
        raise errors.ProgrammerError("Missing input parameter '%s' to"
7046
                                     " IAllocator" % key)
7047
    self._BuildInputData()
7048

    
7049
  def _ComputeClusterData(self):
7050
    """Compute the generic allocator input data.
7051

7052
    This is the data that is independent of the actual operation.
7053

7054
    """
7055
    cfg = self.cfg
7056
    cluster_info = cfg.GetClusterInfo()
7057
    # cluster data
7058
    data = {
7059
      "version": constants.IALLOCATOR_VERSION,
7060
      "cluster_name": cfg.GetClusterName(),
7061
      "cluster_tags": list(cluster_info.GetTags()),
7062
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
7063
      # we don't have job IDs
7064
      }
7065
    iinfo = cfg.GetAllInstancesInfo().values()
7066
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
7067

    
7068
    # node data
7069
    node_results = {}
7070
    node_list = cfg.GetNodeList()
7071

    
7072
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7073
      hypervisor_name = self.hypervisor
7074
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
7075
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
7076

    
7077
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
7078
                                        hypervisor_name)
7079
    node_iinfo = \
7080
      self.rpc.call_all_instances_info(node_list,
7081
                                       cluster_info.enabled_hypervisors)
7082
    for nname, nresult in node_data.items():
7083
      # first fill in static (config-based) values
7084
      ninfo = cfg.GetNodeInfo(nname)
7085
      pnr = {
7086
        "tags": list(ninfo.GetTags()),
7087
        "primary_ip": ninfo.primary_ip,
7088
        "secondary_ip": ninfo.secondary_ip,
7089
        "offline": ninfo.offline,
7090
        "drained": ninfo.drained,
7091
        "master_candidate": ninfo.master_candidate,
7092
        }
7093

    
7094
      if not ninfo.offline:
7095
        nresult.Raise("Can't get data for node %s" % nname)
7096
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
7097
                                nname)
7098
        remote_info = nresult.payload
7099
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
7100
                     'vg_size', 'vg_free', 'cpu_total']:
7101
          if attr not in remote_info:
7102
            raise errors.OpExecError("Node '%s' didn't return attribute"
7103
                                     " '%s'" % (nname, attr))
7104
          if not isinstance(remote_info[attr], int):
7105
            raise errors.OpExecError("Node '%s' returned invalid value"
7106
                                     " for '%s': %s" %
7107
                                     (nname, attr, remote_info[attr]))
7108
        # compute memory used by primary instances
7109
        i_p_mem = i_p_up_mem = 0
7110
        for iinfo, beinfo in i_list:
7111
          if iinfo.primary_node == nname:
7112
            i_p_mem += beinfo[constants.BE_MEMORY]
7113
            if iinfo.name not in node_iinfo[nname].payload:
7114
              i_used_mem = 0
7115
            else:
7116
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
7117
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
7118
            remote_info['memory_free'] -= max(0, i_mem_diff)
7119

    
7120
            if iinfo.admin_up:
7121
              i_p_up_mem += beinfo[constants.BE_MEMORY]
7122

    
7123
        # compute memory used by instances
7124
        pnr_dyn = {
7125
          "total_memory": remote_info['memory_total'],
7126
          "reserved_memory": remote_info['memory_dom0'],
7127
          "free_memory": remote_info['memory_free'],
7128
          "total_disk": remote_info['vg_size'],
7129
          "free_disk": remote_info['vg_free'],
7130
          "total_cpus": remote_info['cpu_total'],
7131
          "i_pri_memory": i_p_mem,
7132
          "i_pri_up_memory": i_p_up_mem,
7133
          }
7134
        pnr.update(pnr_dyn)
7135

    
7136
      node_results[nname] = pnr
7137
    data["nodes"] = node_results
7138

    
7139
    # instance data
7140
    instance_data = {}
7141
    for iinfo, beinfo in i_list:
7142
      nic_data = []
7143
      for nic in iinfo.nics:
7144
        filled_params = objects.FillDict(
7145
            cluster_info.nicparams[constants.PP_DEFAULT],
7146
            nic.nicparams)
7147
        nic_dict = {"mac": nic.mac,
7148
                    "ip": nic.ip,
7149
                    "mode": filled_params[constants.NIC_MODE],
7150
                    "link": filled_params[constants.NIC_LINK],
7151
                   }
7152
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
7153
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
7154
        nic_data.append(nic_dict)
7155
      pir = {
7156
        "tags": list(iinfo.GetTags()),
7157
        "admin_up": iinfo.admin_up,
7158
        "vcpus": beinfo[constants.BE_VCPUS],
7159
        "memory": beinfo[constants.BE_MEMORY],
7160
        "os": iinfo.os,
7161
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
7162
        "nics": nic_data,
7163
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
7164
        "disk_template": iinfo.disk_template,
7165
        "hypervisor": iinfo.hypervisor,
7166
        }
7167
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
7168
                                                 pir["disks"])
7169
      instance_data[iinfo.name] = pir
7170

    
7171
    data["instances"] = instance_data
7172

    
7173
    self.in_data = data
7174

    
7175
  def _AddNewInstance(self):
7176
    """Add new instance data to allocator structure.
7177

7178
    This in combination with _AllocatorGetClusterData will create the
7179
    correct structure needed as input for the allocator.
7180

7181
    The checks for the completeness of the opcode must have already been
7182
    done.
7183

7184
    """
7185
    data = self.in_data
7186

    
7187
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
7188

    
7189
    if self.disk_template in constants.DTS_NET_MIRROR:
7190
      self.required_nodes = 2
7191
    else:
7192
      self.required_nodes = 1
7193
    request = {
7194
      "type": "allocate",
7195
      "name": self.name,
7196
      "disk_template": self.disk_template,
7197
      "tags": self.tags,
7198
      "os": self.os,
7199
      "vcpus": self.vcpus,
7200
      "memory": self.mem_size,
7201
      "disks": self.disks,
7202
      "disk_space_total": disk_space,
7203
      "nics": self.nics,
7204
      "required_nodes": self.required_nodes,
7205
      }
7206
    data["request"] = request
7207

    
7208
  def _AddRelocateInstance(self):
7209
    """Add relocate instance data to allocator structure.
7210

7211
    This in combination with _IAllocatorGetClusterData will create the
7212
    correct structure needed as input for the allocator.
7213

7214
    The checks for the completeness of the opcode must have already been
7215
    done.
7216

7217
    """
7218
    instance = self.cfg.GetInstanceInfo(self.name)
7219
    if instance is None:
7220
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
7221
                                   " IAllocator" % self.name)
7222

    
7223
    if instance.disk_template not in constants.DTS_NET_MIRROR:
7224
      raise errors.OpPrereqError("Can't relocate non-mirrored instances")
7225

    
7226
    if len(instance.secondary_nodes) != 1:
7227
      raise errors.OpPrereqError("Instance has not exactly one secondary node")
7228

    
7229
    self.required_nodes = 1
7230
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
7231
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
7232

    
7233
    request = {
7234
      "type": "relocate",
7235
      "name": self.name,
7236
      "disk_space_total": disk_space,
7237
      "required_nodes": self.required_nodes,
7238
      "relocate_from": self.relocate_from,
7239
      }
7240
    self.in_data["request"] = request
7241

    
7242
  def _BuildInputData(self):
7243
    """Build input data structures.
7244

7245
    """
7246
    self._ComputeClusterData()
7247

    
7248
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7249
      self._AddNewInstance()
7250
    else:
7251
      self._AddRelocateInstance()
7252

    
7253
    self.in_text = serializer.Dump(self.in_data)
7254

    
7255
  def Run(self, name, validate=True, call_fn=None):
7256
    """Run an instance allocator and return the results.
7257

7258
    """
7259
    if call_fn is None:
7260
      call_fn = self.rpc.call_iallocator_runner
7261

    
7262
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
7263
    result.Raise("Failure while running the iallocator script")
7264

    
7265
    self.out_text = result.payload
7266
    if validate:
7267
      self._ValidateResult()
7268

    
7269
  def _ValidateResult(self):
7270
    """Process the allocator results.
7271

7272
    This will process and if successful save the result in
7273
    self.out_data and the other parameters.
7274

7275
    """
7276
    try:
7277
      rdict = serializer.Load(self.out_text)
7278
    except Exception, err:
7279
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
7280

    
7281
    if not isinstance(rdict, dict):
7282
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
7283

    
7284
    for key in "success", "info", "nodes":
7285
      if key not in rdict:
7286
        raise errors.OpExecError("Can't parse iallocator results:"
7287
                                 " missing key '%s'" % key)
7288
      setattr(self, key, rdict[key])
7289

    
7290
    if not isinstance(rdict["nodes"], list):
7291
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
7292
                               " is not a list")
7293
    self.out_data = rdict
7294

    
7295

    
7296
class LUTestAllocator(NoHooksLU):
7297
  """Run allocator tests.
7298

7299
  This LU runs the allocator tests
7300

7301
  """
7302
  _OP_REQP = ["direction", "mode", "name"]
7303

    
7304
  def CheckPrereq(self):
7305
    """Check prerequisites.
7306

7307
    This checks the opcode parameters depending on the director and mode test.
7308

7309
    """
7310
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
7311
      for attr in ["name", "mem_size", "disks", "disk_template",
7312
                   "os", "tags", "nics", "vcpus"]:
7313
        if not hasattr(self.op, attr):
7314
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
7315
                                     attr)
7316
      iname = self.cfg.ExpandInstanceName(self.op.name)
7317
      if iname is not None:
7318
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
7319
                                   iname)
7320
      if not isinstance(self.op.nics, list):
7321
        raise errors.OpPrereqError("Invalid parameter 'nics'")
7322
      for row in self.op.nics:
7323
        if (not isinstance(row, dict) or
7324
            "mac" not in row or
7325
            "ip" not in row or
7326
            "bridge" not in row):
7327
          raise errors.OpPrereqError("Invalid contents of the"
7328
                                     " 'nics' parameter")
7329
      if not isinstance(self.op.disks, list):
7330
        raise errors.OpPrereqError("Invalid parameter 'disks'")
7331
      for row in self.op.disks:
7332
        if (not isinstance(row, dict) or
7333
            "size" not in row or
7334
            not isinstance(row["size"], int) or
7335
            "mode" not in row or
7336
            row["mode"] not in ['r', 'w']):
7337
          raise errors.OpPrereqError("Invalid contents of the"
7338
                                     " 'disks' parameter")
7339
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
7340
        self.op.hypervisor = self.cfg.GetHypervisorType()
7341
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
7342
      if not hasattr(self.op, "name"):
7343
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
7344
      fname = self.cfg.ExpandInstanceName(self.op.name)
7345
      if fname is None:
7346
        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
7347
                                   self.op.name)
7348
      self.op.name = fname
7349
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
7350
    else:
7351
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
7352
                                 self.op.mode)
7353

    
7354
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
7355
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
7356
        raise errors.OpPrereqError("Missing allocator name")
7357
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
7358
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
7359
                                 self.op.direction)
7360

    
7361
  def Exec(self, feedback_fn):
7362
    """Run the allocator test.
7363

7364
    """
7365
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
7366
      ial = IAllocator(self.cfg, self.rpc,
7367
                       mode=self.op.mode,
7368
                       name=self.op.name,
7369
                       mem_size=self.op.mem_size,
7370
                       disks=self.op.disks,
7371
                       disk_template=self.op.disk_template,
7372
                       os=self.op.os,
7373
                       tags=self.op.tags,
7374
                       nics=self.op.nics,
7375
                       vcpus=self.op.vcpus,
7376
                       hypervisor=self.op.hypervisor,
7377
                       )
7378
    else:
7379
      ial = IAllocator(self.cfg, self.rpc,
7380
                       mode=self.op.mode,
7381
                       name=self.op.name,
7382
                       relocate_from=list(self.relocate_from),
7383
                       )
7384

    
7385
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
7386
      result = ial.in_text
7387
    else:
7388
      ial.Run(self.op.allocator, validate=False)
7389
      result = ial.out_text
7390
    return result