Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 692738fc

History | View | Annotate | Download (257 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0613,W0201
25

    
26
import os
27
import os.path
28
import time
29
import re
30
import platform
31
import logging
32
import copy
33

    
34
from ganeti import ssh
35
from ganeti import utils
36
from ganeti import errors
37
from ganeti import hypervisor
38
from ganeti import locking
39
from ganeti import constants
40
from ganeti import objects
41
from ganeti import serializer
42
from ganeti import ssconf
43

    
44

    
45
class LogicalUnit(object):
46
  """Logical Unit base class.
47

48
  Subclasses must follow these rules:
49
    - implement ExpandNames
50
    - implement CheckPrereq (except when tasklets are used)
51
    - implement Exec (except when tasklets are used)
52
    - implement BuildHooksEnv
53
    - redefine HPATH and HTYPE
54
    - optionally redefine their run requirements:
55
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
56

57
  Note that all commands require root permissions.
58

59
  @ivar dry_run_result: the value (if any) that will be returned to the caller
60
      in dry-run mode (signalled by opcode dry_run parameter)
61

62
  """
63
  HPATH = None
64
  HTYPE = None
65
  _OP_REQP = []
66
  REQ_BGL = True
67

    
68
  def __init__(self, processor, op, context, rpc):
69
    """Constructor for LogicalUnit.
70

71
    This needs to be overridden in derived classes in order to check op
72
    validity.
73

74
    """
75
    self.proc = processor
76
    self.op = op
77
    self.cfg = context.cfg
78
    self.context = context
79
    self.rpc = rpc
80
    # Dicts used to declare locking needs to mcpu
81
    self.needed_locks = None
82
    self.acquired_locks = {}
83
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
84
    self.add_locks = {}
85
    self.remove_locks = {}
86
    # Used to force good behavior when calling helper functions
87
    self.recalculate_locks = {}
88
    self.__ssh = None
89
    # logging
90
    self.LogWarning = processor.LogWarning
91
    self.LogInfo = processor.LogInfo
92
    self.LogStep = processor.LogStep
93
    # support for dry-run
94
    self.dry_run_result = None
95

    
96
    # Tasklets
97
    self.tasklets = []
98

    
99
    for attr_name in self._OP_REQP:
100
      attr_val = getattr(op, attr_name, None)
101
      if attr_val is None:
102
        raise errors.OpPrereqError("Required parameter '%s' missing" %
103
                                   attr_name)
104

    
105
    self.CheckArguments()
106

    
107
  def __GetSSH(self):
108
    """Returns the SshRunner object
109

110
    """
111
    if not self.__ssh:
112
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
113
    return self.__ssh
114

    
115
  ssh = property(fget=__GetSSH)
116

    
117
  def CheckArguments(self):
118
    """Check syntactic validity for the opcode arguments.
119

120
    This method is for doing a simple syntactic check and ensure
121
    validity of opcode parameters, without any cluster-related
122
    checks. While the same can be accomplished in ExpandNames and/or
123
    CheckPrereq, doing these separate is better because:
124

125
      - ExpandNames is left as as purely a lock-related function
126
      - CheckPrereq is run after we have acquired locks (and possible
127
        waited for them)
128

129
    The function is allowed to change the self.op attribute so that
130
    later methods can no longer worry about missing parameters.
131

132
    """
133
    pass
134

    
135
  def ExpandNames(self):
136
    """Expand names for this LU.
137

138
    This method is called before starting to execute the opcode, and it should
139
    update all the parameters of the opcode to their canonical form (e.g. a
140
    short node name must be fully expanded after this method has successfully
141
    completed). This way locking, hooks, logging, ecc. can work correctly.
142

143
    LUs which implement this method must also populate the self.needed_locks
144
    member, as a dict with lock levels as keys, and a list of needed lock names
145
    as values. Rules:
146

147
      - use an empty dict if you don't need any lock
148
      - if you don't need any lock at a particular level omit that level
149
      - don't put anything for the BGL level
150
      - if you want all locks at a level use locking.ALL_SET as a value
151

152
    If you need to share locks (rather than acquire them exclusively) at one
153
    level you can modify self.share_locks, setting a true value (usually 1) for
154
    that level. By default locks are not shared.
155

156
    This function can also define a list of tasklets, which then will be
157
    executed in order instead of the usual LU-level CheckPrereq and Exec
158
    functions, if those are not defined by the LU.
159

160
    Examples::
161

162
      # Acquire all nodes and one instance
163
      self.needed_locks = {
164
        locking.LEVEL_NODE: locking.ALL_SET,
165
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
166
      }
167
      # Acquire just two nodes
168
      self.needed_locks = {
169
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
170
      }
171
      # Acquire no locks
172
      self.needed_locks = {} # No, you can't leave it to the default value None
173

174
    """
175
    # The implementation of this method is mandatory only if the new LU is
176
    # concurrent, so that old LUs don't need to be changed all at the same
177
    # time.
178
    if self.REQ_BGL:
179
      self.needed_locks = {} # Exclusive LUs don't need locks.
180
    else:
181
      raise NotImplementedError
182

    
183
  def DeclareLocks(self, level):
184
    """Declare LU locking needs for a level
185

186
    While most LUs can just declare their locking needs at ExpandNames time,
187
    sometimes there's the need to calculate some locks after having acquired
188
    the ones before. This function is called just before acquiring locks at a
189
    particular level, but after acquiring the ones at lower levels, and permits
190
    such calculations. It can be used to modify self.needed_locks, and by
191
    default it does nothing.
192

193
    This function is only called if you have something already set in
194
    self.needed_locks for the level.
195

196
    @param level: Locking level which is going to be locked
197
    @type level: member of ganeti.locking.LEVELS
198

199
    """
200

    
201
  def CheckPrereq(self):
202
    """Check prerequisites for this LU.
203

204
    This method should check that the prerequisites for the execution
205
    of this LU are fulfilled. It can do internode communication, but
206
    it should be idempotent - no cluster or system changes are
207
    allowed.
208

209
    The method should raise errors.OpPrereqError in case something is
210
    not fulfilled. Its return value is ignored.
211

212
    This method should also update all the parameters of the opcode to
213
    their canonical form if it hasn't been done by ExpandNames before.
214

215
    """
216
    if self.tasklets:
217
      for tl in self.tasklets:
218
        tl.CheckPrereq()
219
    else:
220
      raise NotImplementedError
221

    
222
  def Exec(self, feedback_fn):
223
    """Execute the LU.
224

225
    This method should implement the actual work. It should raise
226
    errors.OpExecError for failures that are somewhat dealt with in
227
    code, or expected.
228

229
    """
230
    if self.tasklets:
231
      for tl in self.tasklets:
232
        tl.Exec(feedback_fn)
233
    else:
234
      raise NotImplementedError
235

    
236
  def BuildHooksEnv(self):
237
    """Build hooks environment for this LU.
238

239
    This method should return a three-node tuple consisting of: a dict
240
    containing the environment that will be used for running the
241
    specific hook for this LU, a list of node names on which the hook
242
    should run before the execution, and a list of node names on which
243
    the hook should run after the execution.
244

245
    The keys of the dict must not have 'GANETI_' prefixed as this will
246
    be handled in the hooks runner. Also note additional keys will be
247
    added by the hooks runner. If the LU doesn't define any
248
    environment, an empty dict (and not None) should be returned.
249

250
    No nodes should be returned as an empty list (and not None).
251

252
    Note that if the HPATH for a LU class is None, this function will
253
    not be called.
254

255
    """
256
    raise NotImplementedError
257

    
258
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
259
    """Notify the LU about the results of its hooks.
260

261
    This method is called every time a hooks phase is executed, and notifies
262
    the Logical Unit about the hooks' result. The LU can then use it to alter
263
    its result based on the hooks.  By default the method does nothing and the
264
    previous result is passed back unchanged but any LU can define it if it
265
    wants to use the local cluster hook-scripts somehow.
266

267
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
268
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
269
    @param hook_results: the results of the multi-node hooks rpc call
270
    @param feedback_fn: function used send feedback back to the caller
271
    @param lu_result: the previous Exec result this LU had, or None
272
        in the PRE phase
273
    @return: the new Exec result, based on the previous result
274
        and hook results
275

276
    """
277
    return lu_result
278

    
279
  def _ExpandAndLockInstance(self):
280
    """Helper function to expand and lock an instance.
281

282
    Many LUs that work on an instance take its name in self.op.instance_name
283
    and need to expand it and then declare the expanded name for locking. This
284
    function does it, and then updates self.op.instance_name to the expanded
285
    name. It also initializes needed_locks as a dict, if this hasn't been done
286
    before.
287

288
    """
289
    if self.needed_locks is None:
290
      self.needed_locks = {}
291
    else:
292
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
293
        "_ExpandAndLockInstance called with instance-level locks set"
294
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
295
    if expanded_name is None:
296
      raise errors.OpPrereqError("Instance '%s' not known" %
297
                                  self.op.instance_name)
298
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
299
    self.op.instance_name = expanded_name
300

    
301
  def _LockInstancesNodes(self, primary_only=False):
302
    """Helper function to declare instances' nodes for locking.
303

304
    This function should be called after locking one or more instances to lock
305
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
306
    with all primary or secondary nodes for instances already locked and
307
    present in self.needed_locks[locking.LEVEL_INSTANCE].
308

309
    It should be called from DeclareLocks, and for safety only works if
310
    self.recalculate_locks[locking.LEVEL_NODE] is set.
311

312
    In the future it may grow parameters to just lock some instance's nodes, or
313
    to just lock primaries or secondary nodes, if needed.
314

315
    If should be called in DeclareLocks in a way similar to::
316

317
      if level == locking.LEVEL_NODE:
318
        self._LockInstancesNodes()
319

320
    @type primary_only: boolean
321
    @param primary_only: only lock primary nodes of locked instances
322

323
    """
324
    assert locking.LEVEL_NODE in self.recalculate_locks, \
325
      "_LockInstancesNodes helper function called with no nodes to recalculate"
326

    
327
    # TODO: check if we're really been called with the instance locks held
328

    
329
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
330
    # future we might want to have different behaviors depending on the value
331
    # of self.recalculate_locks[locking.LEVEL_NODE]
332
    wanted_nodes = []
333
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
334
      instance = self.context.cfg.GetInstanceInfo(instance_name)
335
      wanted_nodes.append(instance.primary_node)
336
      if not primary_only:
337
        wanted_nodes.extend(instance.secondary_nodes)
338

    
339
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
340
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
341
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
342
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
343

    
344
    del self.recalculate_locks[locking.LEVEL_NODE]
345

    
346

    
347
class NoHooksLU(LogicalUnit):
348
  """Simple LU which runs no hooks.
349

350
  This LU is intended as a parent for other LogicalUnits which will
351
  run no hooks, in order to reduce duplicate code.
352

353
  """
354
  HPATH = None
355
  HTYPE = None
356

    
357

    
358
class Tasklet:
359
  """Tasklet base class.
360

361
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
362
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
363
  tasklets know nothing about locks.
364

365
  Subclasses must follow these rules:
366
    - Implement CheckPrereq
367
    - Implement Exec
368

369
  """
370
  def CheckPrereq(self):
371
    """Check prerequisites for this tasklets.
372

373
    This method should check whether the prerequisites for the execution of
374
    this tasklet are fulfilled. It can do internode communication, but it
375
    should be idempotent - no cluster or system changes are allowed.
376

377
    The method should raise errors.OpPrereqError in case something is not
378
    fulfilled. Its return value is ignored.
379

380
    This method should also update all parameters to their canonical form if it
381
    hasn't been done before.
382

383
    """
384
    raise NotImplementedError
385

    
386
  def Exec(self, feedback_fn):
387
    """Execute the tasklet.
388

389
    This method should implement the actual work. It should raise
390
    errors.OpExecError for failures that are somewhat dealt with in code, or
391
    expected.
392

393
    """
394
    raise NotImplementedError
395

    
396

    
397
def _GetWantedNodes(lu, nodes):
398
  """Returns list of checked and expanded node names.
399

400
  @type lu: L{LogicalUnit}
401
  @param lu: the logical unit on whose behalf we execute
402
  @type nodes: list
403
  @param nodes: list of node names or None for all nodes
404
  @rtype: list
405
  @return: the list of nodes, sorted
406
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
407

408
  """
409
  if not isinstance(nodes, list):
410
    raise errors.OpPrereqError("Invalid argument type 'nodes'")
411

    
412
  if not nodes:
413
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
414
      " non-empty list of nodes whose name is to be expanded.")
415

    
416
  wanted = []
417
  for name in nodes:
418
    node = lu.cfg.ExpandNodeName(name)
419
    if node is None:
420
      raise errors.OpPrereqError("No such node name '%s'" % name)
421
    wanted.append(node)
422

    
423
  return utils.NiceSort(wanted)
424

    
425

    
426
def _GetWantedInstances(lu, instances):
427
  """Returns list of checked and expanded instance names.
428

429
  @type lu: L{LogicalUnit}
430
  @param lu: the logical unit on whose behalf we execute
431
  @type instances: list
432
  @param instances: list of instance names or None for all instances
433
  @rtype: list
434
  @return: the list of instances, sorted
435
  @raise errors.OpPrereqError: if the instances parameter is wrong type
436
  @raise errors.OpPrereqError: if any of the passed instances is not found
437

438
  """
439
  if not isinstance(instances, list):
440
    raise errors.OpPrereqError("Invalid argument type 'instances'")
441

    
442
  if instances:
443
    wanted = []
444

    
445
    for name in instances:
446
      instance = lu.cfg.ExpandInstanceName(name)
447
      if instance is None:
448
        raise errors.OpPrereqError("No such instance name '%s'" % name)
449
      wanted.append(instance)
450

    
451
  else:
452
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
453
  return wanted
454

    
455

    
456
def _CheckOutputFields(static, dynamic, selected):
457
  """Checks whether all selected fields are valid.
458

459
  @type static: L{utils.FieldSet}
460
  @param static: static fields set
461
  @type dynamic: L{utils.FieldSet}
462
  @param dynamic: dynamic fields set
463

464
  """
465
  f = utils.FieldSet()
466
  f.Extend(static)
467
  f.Extend(dynamic)
468

    
469
  delta = f.NonMatching(selected)
470
  if delta:
471
    raise errors.OpPrereqError("Unknown output fields selected: %s"
472
                               % ",".join(delta))
473

    
474

    
475
def _CheckBooleanOpField(op, name):
476
  """Validates boolean opcode parameters.
477

478
  This will ensure that an opcode parameter is either a boolean value,
479
  or None (but that it always exists).
480

481
  """
482
  val = getattr(op, name, None)
483
  if not (val is None or isinstance(val, bool)):
484
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
485
                               (name, str(val)))
486
  setattr(op, name, val)
487

    
488

    
489
def _CheckNodeOnline(lu, node):
490
  """Ensure that a given node is online.
491

492
  @param lu: the LU on behalf of which we make the check
493
  @param node: the node to check
494
  @raise errors.OpPrereqError: if the node is offline
495

496
  """
497
  if lu.cfg.GetNodeInfo(node).offline:
498
    raise errors.OpPrereqError("Can't use offline node %s" % node)
499

    
500

    
501
def _CheckNodeNotDrained(lu, node):
502
  """Ensure that a given node is not drained.
503

504
  @param lu: the LU on behalf of which we make the check
505
  @param node: the node to check
506
  @raise errors.OpPrereqError: if the node is drained
507

508
  """
509
  if lu.cfg.GetNodeInfo(node).drained:
510
    raise errors.OpPrereqError("Can't use drained node %s" % node)
511

    
512

    
513
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
514
                          memory, vcpus, nics, disk_template, disks,
515
                          bep, hvp, hypervisor_name):
516
  """Builds instance related env variables for hooks
517

518
  This builds the hook environment from individual variables.
519

520
  @type name: string
521
  @param name: the name of the instance
522
  @type primary_node: string
523
  @param primary_node: the name of the instance's primary node
524
  @type secondary_nodes: list
525
  @param secondary_nodes: list of secondary nodes as strings
526
  @type os_type: string
527
  @param os_type: the name of the instance's OS
528
  @type status: boolean
529
  @param status: the should_run status of the instance
530
  @type memory: string
531
  @param memory: the memory size of the instance
532
  @type vcpus: string
533
  @param vcpus: the count of VCPUs the instance has
534
  @type nics: list
535
  @param nics: list of tuples (ip, mac, mode, link) representing
536
      the NICs the instance has
537
  @type disk_template: string
538
  @param disk_template: the disk template of the instance
539
  @type disks: list
540
  @param disks: the list of (size, mode) pairs
541
  @type bep: dict
542
  @param bep: the backend parameters for the instance
543
  @type hvp: dict
544
  @param hvp: the hypervisor parameters for the instance
545
  @type hypervisor_name: string
546
  @param hypervisor_name: the hypervisor for the instance
547
  @rtype: dict
548
  @return: the hook environment for this instance
549

550
  """
551
  if status:
552
    str_status = "up"
553
  else:
554
    str_status = "down"
555
  env = {
556
    "OP_TARGET": name,
557
    "INSTANCE_NAME": name,
558
    "INSTANCE_PRIMARY": primary_node,
559
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
560
    "INSTANCE_OS_TYPE": os_type,
561
    "INSTANCE_STATUS": str_status,
562
    "INSTANCE_MEMORY": memory,
563
    "INSTANCE_VCPUS": vcpus,
564
    "INSTANCE_DISK_TEMPLATE": disk_template,
565
    "INSTANCE_HYPERVISOR": hypervisor_name,
566
  }
567

    
568
  if nics:
569
    nic_count = len(nics)
570
    for idx, (ip, mac, mode, link) in enumerate(nics):
571
      if ip is None:
572
        ip = ""
573
      env["INSTANCE_NIC%d_IP" % idx] = ip
574
      env["INSTANCE_NIC%d_MAC" % idx] = mac
575
      env["INSTANCE_NIC%d_MODE" % idx] = mode
576
      env["INSTANCE_NIC%d_LINK" % idx] = link
577
      if mode == constants.NIC_MODE_BRIDGED:
578
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
579
  else:
580
    nic_count = 0
581

    
582
  env["INSTANCE_NIC_COUNT"] = nic_count
583

    
584
  if disks:
585
    disk_count = len(disks)
586
    for idx, (size, mode) in enumerate(disks):
587
      env["INSTANCE_DISK%d_SIZE" % idx] = size
588
      env["INSTANCE_DISK%d_MODE" % idx] = mode
589
  else:
590
    disk_count = 0
591

    
592
  env["INSTANCE_DISK_COUNT"] = disk_count
593

    
594
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
595
    for key, value in source.items():
596
      env["INSTANCE_%s_%s" % (kind, key)] = value
597

    
598
  return env
599

    
600
def _NICListToTuple(lu, nics):
601
  """Build a list of nic information tuples.
602

603
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
604
  value in LUQueryInstanceData.
605

606
  @type lu:  L{LogicalUnit}
607
  @param lu: the logical unit on whose behalf we execute
608
  @type nics: list of L{objects.NIC}
609
  @param nics: list of nics to convert to hooks tuples
610

611
  """
612
  hooks_nics = []
613
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
614
  for nic in nics:
615
    ip = nic.ip
616
    mac = nic.mac
617
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
618
    mode = filled_params[constants.NIC_MODE]
619
    link = filled_params[constants.NIC_LINK]
620
    hooks_nics.append((ip, mac, mode, link))
621
  return hooks_nics
622

    
623
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
624
  """Builds instance related env variables for hooks from an object.
625

626
  @type lu: L{LogicalUnit}
627
  @param lu: the logical unit on whose behalf we execute
628
  @type instance: L{objects.Instance}
629
  @param instance: the instance for which we should build the
630
      environment
631
  @type override: dict
632
  @param override: dictionary with key/values that will override
633
      our values
634
  @rtype: dict
635
  @return: the hook environment dictionary
636

637
  """
638
  cluster = lu.cfg.GetClusterInfo()
639
  bep = cluster.FillBE(instance)
640
  hvp = cluster.FillHV(instance)
641
  args = {
642
    'name': instance.name,
643
    'primary_node': instance.primary_node,
644
    'secondary_nodes': instance.secondary_nodes,
645
    'os_type': instance.os,
646
    'status': instance.admin_up,
647
    'memory': bep[constants.BE_MEMORY],
648
    'vcpus': bep[constants.BE_VCPUS],
649
    'nics': _NICListToTuple(lu, instance.nics),
650
    'disk_template': instance.disk_template,
651
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
652
    'bep': bep,
653
    'hvp': hvp,
654
    'hypervisor_name': instance.hypervisor,
655
  }
656
  if override:
657
    args.update(override)
658
  return _BuildInstanceHookEnv(**args)
659

    
660

    
661
def _AdjustCandidatePool(lu):
662
  """Adjust the candidate pool after node operations.
663

664
  """
665
  mod_list = lu.cfg.MaintainCandidatePool()
666
  if mod_list:
667
    lu.LogInfo("Promoted nodes to master candidate role: %s",
668
               ", ".join(node.name for node in mod_list))
669
    for name in mod_list:
670
      lu.context.ReaddNode(name)
671
  mc_now, mc_max = lu.cfg.GetMasterCandidateStats()
672
  if mc_now > mc_max:
673
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
674
               (mc_now, mc_max))
675

    
676

    
677
def _CheckNicsBridgesExist(lu, target_nics, target_node,
678
                               profile=constants.PP_DEFAULT):
679
  """Check that the brigdes needed by a list of nics exist.
680

681
  """
682
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
683
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
684
                for nic in target_nics]
685
  brlist = [params[constants.NIC_LINK] for params in paramslist
686
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
687
  if brlist:
688
    result = lu.rpc.call_bridges_exist(target_node, brlist)
689
    result.Raise("Error checking bridges on destination node '%s'" %
690
                 target_node, prereq=True)
691

    
692

    
693
def _CheckInstanceBridgesExist(lu, instance, node=None):
694
  """Check that the brigdes needed by an instance exist.
695

696
  """
697
  if node is None:
698
    node = instance.primary_node
699
  _CheckNicsBridgesExist(lu, instance.nics, node)
700

    
701

    
702
def _GetNodeSecondaryInstances(cfg, node_name):
703
  """Returns secondary instances on a node.
704

705
  """
706
  instances = []
707

    
708
  for (_, inst) in cfg.GetAllInstancesInfo().iteritems():
709
    if node_name in inst.secondary_nodes:
710
      instances.append(inst)
711

    
712
  return instances
713

    
714

    
715
class LUDestroyCluster(NoHooksLU):
716
  """Logical unit for destroying the cluster.
717

718
  """
719
  _OP_REQP = []
720

    
721
  def CheckPrereq(self):
722
    """Check prerequisites.
723

724
    This checks whether the cluster is empty.
725

726
    Any errors are signaled by raising errors.OpPrereqError.
727

728
    """
729
    master = self.cfg.GetMasterNode()
730

    
731
    nodelist = self.cfg.GetNodeList()
732
    if len(nodelist) != 1 or nodelist[0] != master:
733
      raise errors.OpPrereqError("There are still %d node(s) in"
734
                                 " this cluster." % (len(nodelist) - 1))
735
    instancelist = self.cfg.GetInstanceList()
736
    if instancelist:
737
      raise errors.OpPrereqError("There are still %d instance(s) in"
738
                                 " this cluster." % len(instancelist))
739

    
740
  def Exec(self, feedback_fn):
741
    """Destroys the cluster.
742

743
    """
744
    master = self.cfg.GetMasterNode()
745
    result = self.rpc.call_node_stop_master(master, False)
746
    result.Raise("Could not disable the master role")
747
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
748
    utils.CreateBackup(priv_key)
749
    utils.CreateBackup(pub_key)
750
    return master
751

    
752

    
753
class LUVerifyCluster(LogicalUnit):
754
  """Verifies the cluster status.
755

756
  """
757
  HPATH = "cluster-verify"
758
  HTYPE = constants.HTYPE_CLUSTER
759
  _OP_REQP = ["skip_checks"]
760
  REQ_BGL = False
761

    
762
  def ExpandNames(self):
763
    self.needed_locks = {
764
      locking.LEVEL_NODE: locking.ALL_SET,
765
      locking.LEVEL_INSTANCE: locking.ALL_SET,
766
    }
767
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
768

    
769
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
770
                  node_result, feedback_fn, master_files,
771
                  drbd_map, vg_name):
772
    """Run multiple tests against a node.
773

774
    Test list:
775

776
      - compares ganeti version
777
      - checks vg existence and size > 20G
778
      - checks config file checksum
779
      - checks ssh to other nodes
780

781
    @type nodeinfo: L{objects.Node}
782
    @param nodeinfo: the node to check
783
    @param file_list: required list of files
784
    @param local_cksum: dictionary of local files and their checksums
785
    @param node_result: the results from the node
786
    @param feedback_fn: function used to accumulate results
787
    @param master_files: list of files that only masters should have
788
    @param drbd_map: the useddrbd minors for this node, in
789
        form of minor: (instance, must_exist) which correspond to instances
790
        and their running status
791
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
792

793
    """
794
    node = nodeinfo.name
795

    
796
    # main result, node_result should be a non-empty dict
797
    if not node_result or not isinstance(node_result, dict):
798
      feedback_fn("  - ERROR: unable to verify node %s." % (node,))
799
      return True
800

    
801
    # compares ganeti version
802
    local_version = constants.PROTOCOL_VERSION
803
    remote_version = node_result.get('version', None)
804
    if not (remote_version and isinstance(remote_version, (list, tuple)) and
805
            len(remote_version) == 2):
806
      feedback_fn("  - ERROR: connection to %s failed" % (node))
807
      return True
808

    
809
    if local_version != remote_version[0]:
810
      feedback_fn("  - ERROR: incompatible protocol versions: master %s,"
811
                  " node %s %s" % (local_version, node, remote_version[0]))
812
      return True
813

    
814
    # node seems compatible, we can actually try to look into its results
815

    
816
    bad = False
817

    
818
    # full package version
819
    if constants.RELEASE_VERSION != remote_version[1]:
820
      feedback_fn("  - WARNING: software version mismatch: master %s,"
821
                  " node %s %s" %
822
                  (constants.RELEASE_VERSION, node, remote_version[1]))
823

    
824
    # checks vg existence and size > 20G
825
    if vg_name is not None:
826
      vglist = node_result.get(constants.NV_VGLIST, None)
827
      if not vglist:
828
        feedback_fn("  - ERROR: unable to check volume groups on node %s." %
829
                        (node,))
830
        bad = True
831
      else:
832
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
833
                                              constants.MIN_VG_SIZE)
834
        if vgstatus:
835
          feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
836
          bad = True
837

    
838
    # checks config file checksum
839

    
840
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
841
    if not isinstance(remote_cksum, dict):
842
      bad = True
843
      feedback_fn("  - ERROR: node hasn't returned file checksum data")
844
    else:
845
      for file_name in file_list:
846
        node_is_mc = nodeinfo.master_candidate
847
        must_have_file = file_name not in master_files
848
        if file_name not in remote_cksum:
849
          if node_is_mc or must_have_file:
850
            bad = True
851
            feedback_fn("  - ERROR: file '%s' missing" % file_name)
852
        elif remote_cksum[file_name] != local_cksum[file_name]:
853
          if node_is_mc or must_have_file:
854
            bad = True
855
            feedback_fn("  - ERROR: file '%s' has wrong checksum" % file_name)
856
          else:
857
            # not candidate and this is not a must-have file
858
            bad = True
859
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
860
                        " candidates (and the file is outdated)" % file_name)
861
        else:
862
          # all good, except non-master/non-must have combination
863
          if not node_is_mc and not must_have_file:
864
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
865
                        " candidates" % file_name)
866

    
867
    # checks ssh to any
868

    
869
    if constants.NV_NODELIST not in node_result:
870
      bad = True
871
      feedback_fn("  - ERROR: node hasn't returned node ssh connectivity data")
872
    else:
873
      if node_result[constants.NV_NODELIST]:
874
        bad = True
875
        for node in node_result[constants.NV_NODELIST]:
876
          feedback_fn("  - ERROR: ssh communication with node '%s': %s" %
877
                          (node, node_result[constants.NV_NODELIST][node]))
878

    
879
    if constants.NV_NODENETTEST not in node_result:
880
      bad = True
881
      feedback_fn("  - ERROR: node hasn't returned node tcp connectivity data")
882
    else:
883
      if node_result[constants.NV_NODENETTEST]:
884
        bad = True
885
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
886
        for node in nlist:
887
          feedback_fn("  - ERROR: tcp communication with node '%s': %s" %
888
                          (node, node_result[constants.NV_NODENETTEST][node]))
889

    
890
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
891
    if isinstance(hyp_result, dict):
892
      for hv_name, hv_result in hyp_result.iteritems():
893
        if hv_result is not None:
894
          feedback_fn("  - ERROR: hypervisor %s verify failure: '%s'" %
895
                      (hv_name, hv_result))
896

    
897
    # check used drbd list
898
    if vg_name is not None:
899
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
900
      if not isinstance(used_minors, (tuple, list)):
901
        feedback_fn("  - ERROR: cannot parse drbd status file: %s" %
902
                    str(used_minors))
903
      else:
904
        for minor, (iname, must_exist) in drbd_map.items():
905
          if minor not in used_minors and must_exist:
906
            feedback_fn("  - ERROR: drbd minor %d of instance %s is"
907
                        " not active" % (minor, iname))
908
            bad = True
909
        for minor in used_minors:
910
          if minor not in drbd_map:
911
            feedback_fn("  - ERROR: unallocated drbd minor %d is in use" %
912
                        minor)
913
            bad = True
914

    
915
    return bad
916

    
917
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
918
                      node_instance, feedback_fn, n_offline):
919
    """Verify an instance.
920

921
    This function checks to see if the required block devices are
922
    available on the instance's node.
923

924
    """
925
    bad = False
926

    
927
    node_current = instanceconfig.primary_node
928

    
929
    node_vol_should = {}
930
    instanceconfig.MapLVsByNode(node_vol_should)
931

    
932
    for node in node_vol_should:
933
      if node in n_offline:
934
        # ignore missing volumes on offline nodes
935
        continue
936
      for volume in node_vol_should[node]:
937
        if node not in node_vol_is or volume not in node_vol_is[node]:
938
          feedback_fn("  - ERROR: volume %s missing on node %s" %
939
                          (volume, node))
940
          bad = True
941

    
942
    if instanceconfig.admin_up:
943
      if ((node_current not in node_instance or
944
          not instance in node_instance[node_current]) and
945
          node_current not in n_offline):
946
        feedback_fn("  - ERROR: instance %s not running on node %s" %
947
                        (instance, node_current))
948
        bad = True
949

    
950
    for node in node_instance:
951
      if (not node == node_current):
952
        if instance in node_instance[node]:
953
          feedback_fn("  - ERROR: instance %s should not run on node %s" %
954
                          (instance, node))
955
          bad = True
956

    
957
    return bad
958

    
959
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
960
    """Verify if there are any unknown volumes in the cluster.
961

962
    The .os, .swap and backup volumes are ignored. All other volumes are
963
    reported as unknown.
964

965
    """
966
    bad = False
967

    
968
    for node in node_vol_is:
969
      for volume in node_vol_is[node]:
970
        if node not in node_vol_should or volume not in node_vol_should[node]:
971
          feedback_fn("  - ERROR: volume %s on node %s should not exist" %
972
                      (volume, node))
973
          bad = True
974
    return bad
975

    
976
  def _VerifyOrphanInstances(self, instancelist, node_instance, feedback_fn):
977
    """Verify the list of running instances.
978

979
    This checks what instances are running but unknown to the cluster.
980

981
    """
982
    bad = False
983
    for node in node_instance:
984
      for runninginstance in node_instance[node]:
985
        if runninginstance not in instancelist:
986
          feedback_fn("  - ERROR: instance %s on node %s should not exist" %
987
                          (runninginstance, node))
988
          bad = True
989
    return bad
990

    
991
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg, feedback_fn):
992
    """Verify N+1 Memory Resilience.
993

994
    Check that if one single node dies we can still start all the instances it
995
    was primary for.
996

997
    """
998
    bad = False
999

    
1000
    for node, nodeinfo in node_info.iteritems():
1001
      # This code checks that every node which is now listed as secondary has
1002
      # enough memory to host all instances it is supposed to should a single
1003
      # other node in the cluster fail.
1004
      # FIXME: not ready for failover to an arbitrary node
1005
      # FIXME: does not support file-backed instances
1006
      # WARNING: we currently take into account down instances as well as up
1007
      # ones, considering that even if they're down someone might want to start
1008
      # them even in the event of a node failure.
1009
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1010
        needed_mem = 0
1011
        for instance in instances:
1012
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1013
          if bep[constants.BE_AUTO_BALANCE]:
1014
            needed_mem += bep[constants.BE_MEMORY]
1015
        if nodeinfo['mfree'] < needed_mem:
1016
          feedback_fn("  - ERROR: not enough memory on node %s to accommodate"
1017
                      " failovers should node %s fail" % (node, prinode))
1018
          bad = True
1019
    return bad
1020

    
1021
  def CheckPrereq(self):
1022
    """Check prerequisites.
1023

1024
    Transform the list of checks we're going to skip into a set and check that
1025
    all its members are valid.
1026

1027
    """
1028
    self.skip_set = frozenset(self.op.skip_checks)
1029
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1030
      raise errors.OpPrereqError("Invalid checks to be skipped specified")
1031

    
1032
  def BuildHooksEnv(self):
1033
    """Build hooks env.
1034

1035
    Cluster-Verify hooks just ran in the post phase and their failure makes
1036
    the output be logged in the verify output and the verification to fail.
1037

1038
    """
1039
    all_nodes = self.cfg.GetNodeList()
1040
    env = {
1041
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1042
      }
1043
    for node in self.cfg.GetAllNodesInfo().values():
1044
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1045

    
1046
    return env, [], all_nodes
1047

    
1048
  def Exec(self, feedback_fn):
1049
    """Verify integrity of cluster, performing various test on nodes.
1050

1051
    """
1052
    bad = False
1053
    feedback_fn("* Verifying global settings")
1054
    for msg in self.cfg.VerifyConfig():
1055
      feedback_fn("  - ERROR: %s" % msg)
1056

    
1057
    vg_name = self.cfg.GetVGName()
1058
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1059
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1060
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1061
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1062
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1063
                        for iname in instancelist)
1064
    i_non_redundant = [] # Non redundant instances
1065
    i_non_a_balanced = [] # Non auto-balanced instances
1066
    n_offline = [] # List of offline nodes
1067
    n_drained = [] # List of nodes being drained
1068
    node_volume = {}
1069
    node_instance = {}
1070
    node_info = {}
1071
    instance_cfg = {}
1072

    
1073
    # FIXME: verify OS list
1074
    # do local checksums
1075
    master_files = [constants.CLUSTER_CONF_FILE]
1076

    
1077
    file_names = ssconf.SimpleStore().GetFileList()
1078
    file_names.append(constants.SSL_CERT_FILE)
1079
    file_names.append(constants.RAPI_CERT_FILE)
1080
    file_names.extend(master_files)
1081

    
1082
    local_checksums = utils.FingerprintFiles(file_names)
1083

    
1084
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1085
    node_verify_param = {
1086
      constants.NV_FILELIST: file_names,
1087
      constants.NV_NODELIST: [node.name for node in nodeinfo
1088
                              if not node.offline],
1089
      constants.NV_HYPERVISOR: hypervisors,
1090
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1091
                                  node.secondary_ip) for node in nodeinfo
1092
                                 if not node.offline],
1093
      constants.NV_INSTANCELIST: hypervisors,
1094
      constants.NV_VERSION: None,
1095
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1096
      }
1097
    if vg_name is not None:
1098
      node_verify_param[constants.NV_VGLIST] = None
1099
      node_verify_param[constants.NV_LVLIST] = vg_name
1100
      node_verify_param[constants.NV_DRBDLIST] = None
1101
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1102
                                           self.cfg.GetClusterName())
1103

    
1104
    cluster = self.cfg.GetClusterInfo()
1105
    master_node = self.cfg.GetMasterNode()
1106
    all_drbd_map = self.cfg.ComputeDRBDMap()
1107

    
1108
    for node_i in nodeinfo:
1109
      node = node_i.name
1110

    
1111
      if node_i.offline:
1112
        feedback_fn("* Skipping offline node %s" % (node,))
1113
        n_offline.append(node)
1114
        continue
1115

    
1116
      if node == master_node:
1117
        ntype = "master"
1118
      elif node_i.master_candidate:
1119
        ntype = "master candidate"
1120
      elif node_i.drained:
1121
        ntype = "drained"
1122
        n_drained.append(node)
1123
      else:
1124
        ntype = "regular"
1125
      feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1126

    
1127
      msg = all_nvinfo[node].fail_msg
1128
      if msg:
1129
        feedback_fn("  - ERROR: while contacting node %s: %s" % (node, msg))
1130
        bad = True
1131
        continue
1132

    
1133
      nresult = all_nvinfo[node].payload
1134
      node_drbd = {}
1135
      for minor, instance in all_drbd_map[node].items():
1136
        if instance not in instanceinfo:
1137
          feedback_fn("  - ERROR: ghost instance '%s' in temporary DRBD map" %
1138
                      instance)
1139
          # ghost instance should not be running, but otherwise we
1140
          # don't give double warnings (both ghost instance and
1141
          # unallocated minor in use)
1142
          node_drbd[minor] = (instance, False)
1143
        else:
1144
          instance = instanceinfo[instance]
1145
          node_drbd[minor] = (instance.name, instance.admin_up)
1146
      result = self._VerifyNode(node_i, file_names, local_checksums,
1147
                                nresult, feedback_fn, master_files,
1148
                                node_drbd, vg_name)
1149
      bad = bad or result
1150

    
1151
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1152
      if vg_name is None:
1153
        node_volume[node] = {}
1154
      elif isinstance(lvdata, basestring):
1155
        feedback_fn("  - ERROR: LVM problem on node %s: %s" %
1156
                    (node, utils.SafeEncode(lvdata)))
1157
        bad = True
1158
        node_volume[node] = {}
1159
      elif not isinstance(lvdata, dict):
1160
        feedback_fn("  - ERROR: connection to %s failed (lvlist)" % (node,))
1161
        bad = True
1162
        continue
1163
      else:
1164
        node_volume[node] = lvdata
1165

    
1166
      # node_instance
1167
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1168
      if not isinstance(idata, list):
1169
        feedback_fn("  - ERROR: connection to %s failed (instancelist)" %
1170
                    (node,))
1171
        bad = True
1172
        continue
1173

    
1174
      node_instance[node] = idata
1175

    
1176
      # node_info
1177
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1178
      if not isinstance(nodeinfo, dict):
1179
        feedback_fn("  - ERROR: connection to %s failed (hvinfo)" % (node,))
1180
        bad = True
1181
        continue
1182

    
1183
      try:
1184
        node_info[node] = {
1185
          "mfree": int(nodeinfo['memory_free']),
1186
          "pinst": [],
1187
          "sinst": [],
1188
          # dictionary holding all instances this node is secondary for,
1189
          # grouped by their primary node. Each key is a cluster node, and each
1190
          # value is a list of instances which have the key as primary and the
1191
          # current node as secondary.  this is handy to calculate N+1 memory
1192
          # availability if you can only failover from a primary to its
1193
          # secondary.
1194
          "sinst-by-pnode": {},
1195
        }
1196
        # FIXME: devise a free space model for file based instances as well
1197
        if vg_name is not None:
1198
          if (constants.NV_VGLIST not in nresult or
1199
              vg_name not in nresult[constants.NV_VGLIST]):
1200
            feedback_fn("  - ERROR: node %s didn't return data for the"
1201
                        " volume group '%s' - it is either missing or broken" %
1202
                        (node, vg_name))
1203
            bad = True
1204
            continue
1205
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1206
      except (ValueError, KeyError):
1207
        feedback_fn("  - ERROR: invalid nodeinfo value returned"
1208
                    " from node %s" % (node,))
1209
        bad = True
1210
        continue
1211

    
1212
    node_vol_should = {}
1213

    
1214
    for instance in instancelist:
1215
      feedback_fn("* Verifying instance %s" % instance)
1216
      inst_config = instanceinfo[instance]
1217
      result =  self._VerifyInstance(instance, inst_config, node_volume,
1218
                                     node_instance, feedback_fn, n_offline)
1219
      bad = bad or result
1220
      inst_nodes_offline = []
1221

    
1222
      inst_config.MapLVsByNode(node_vol_should)
1223

    
1224
      instance_cfg[instance] = inst_config
1225

    
1226
      pnode = inst_config.primary_node
1227
      if pnode in node_info:
1228
        node_info[pnode]['pinst'].append(instance)
1229
      elif pnode not in n_offline:
1230
        feedback_fn("  - ERROR: instance %s, connection to primary node"
1231
                    " %s failed" % (instance, pnode))
1232
        bad = True
1233

    
1234
      if pnode in n_offline:
1235
        inst_nodes_offline.append(pnode)
1236

    
1237
      # If the instance is non-redundant we cannot survive losing its primary
1238
      # node, so we are not N+1 compliant. On the other hand we have no disk
1239
      # templates with more than one secondary so that situation is not well
1240
      # supported either.
1241
      # FIXME: does not support file-backed instances
1242
      if len(inst_config.secondary_nodes) == 0:
1243
        i_non_redundant.append(instance)
1244
      elif len(inst_config.secondary_nodes) > 1:
1245
        feedback_fn("  - WARNING: multiple secondaries for instance %s"
1246
                    % instance)
1247

    
1248
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1249
        i_non_a_balanced.append(instance)
1250

    
1251
      for snode in inst_config.secondary_nodes:
1252
        if snode in node_info:
1253
          node_info[snode]['sinst'].append(instance)
1254
          if pnode not in node_info[snode]['sinst-by-pnode']:
1255
            node_info[snode]['sinst-by-pnode'][pnode] = []
1256
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1257
        elif snode not in n_offline:
1258
          feedback_fn("  - ERROR: instance %s, connection to secondary node"
1259
                      " %s failed" % (instance, snode))
1260
          bad = True
1261
        if snode in n_offline:
1262
          inst_nodes_offline.append(snode)
1263

    
1264
      if inst_nodes_offline:
1265
        # warn that the instance lives on offline nodes, and set bad=True
1266
        feedback_fn("  - ERROR: instance lives on offline node(s) %s" %
1267
                    ", ".join(inst_nodes_offline))
1268
        bad = True
1269

    
1270
    feedback_fn("* Verifying orphan volumes")
1271
    result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
1272
                                       feedback_fn)
1273
    bad = bad or result
1274

    
1275
    feedback_fn("* Verifying remaining instances")
1276
    result = self._VerifyOrphanInstances(instancelist, node_instance,
1277
                                         feedback_fn)
1278
    bad = bad or result
1279

    
1280
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1281
      feedback_fn("* Verifying N+1 Memory redundancy")
1282
      result = self._VerifyNPlusOneMemory(node_info, instance_cfg, feedback_fn)
1283
      bad = bad or result
1284

    
1285
    feedback_fn("* Other Notes")
1286
    if i_non_redundant:
1287
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1288
                  % len(i_non_redundant))
1289

    
1290
    if i_non_a_balanced:
1291
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1292
                  % len(i_non_a_balanced))
1293

    
1294
    if n_offline:
1295
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1296

    
1297
    if n_drained:
1298
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1299

    
1300
    return not bad
1301

    
1302
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1303
    """Analyze the post-hooks' result
1304

1305
    This method analyses the hook result, handles it, and sends some
1306
    nicely-formatted feedback back to the user.
1307

1308
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1309
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1310
    @param hooks_results: the results of the multi-node hooks rpc call
1311
    @param feedback_fn: function used send feedback back to the caller
1312
    @param lu_result: previous Exec result
1313
    @return: the new Exec result, based on the previous result
1314
        and hook results
1315

1316
    """
1317
    # We only really run POST phase hooks, and are only interested in
1318
    # their results
1319
    if phase == constants.HOOKS_PHASE_POST:
1320
      # Used to change hooks' output to proper indentation
1321
      indent_re = re.compile('^', re.M)
1322
      feedback_fn("* Hooks Results")
1323
      if not hooks_results:
1324
        feedback_fn("  - ERROR: general communication failure")
1325
        lu_result = 1
1326
      else:
1327
        for node_name in hooks_results:
1328
          show_node_header = True
1329
          res = hooks_results[node_name]
1330
          msg = res.fail_msg
1331
          if msg:
1332
            if res.offline:
1333
              # no need to warn or set fail return value
1334
              continue
1335
            feedback_fn("    Communication failure in hooks execution: %s" %
1336
                        msg)
1337
            lu_result = 1
1338
            continue
1339
          for script, hkr, output in res.payload:
1340
            if hkr == constants.HKR_FAIL:
1341
              # The node header is only shown once, if there are
1342
              # failing hooks on that node
1343
              if show_node_header:
1344
                feedback_fn("  Node %s:" % node_name)
1345
                show_node_header = False
1346
              feedback_fn("    ERROR: Script %s failed, output:" % script)
1347
              output = indent_re.sub('      ', output)
1348
              feedback_fn("%s" % output)
1349
              lu_result = 1
1350

    
1351
      return lu_result
1352

    
1353

    
1354
class LUVerifyDisks(NoHooksLU):
1355
  """Verifies the cluster disks status.
1356

1357
  """
1358
  _OP_REQP = []
1359
  REQ_BGL = False
1360

    
1361
  def ExpandNames(self):
1362
    self.needed_locks = {
1363
      locking.LEVEL_NODE: locking.ALL_SET,
1364
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1365
    }
1366
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1367

    
1368
  def CheckPrereq(self):
1369
    """Check prerequisites.
1370

1371
    This has no prerequisites.
1372

1373
    """
1374
    pass
1375

    
1376
  def Exec(self, feedback_fn):
1377
    """Verify integrity of cluster disks.
1378

1379
    @rtype: tuple of three items
1380
    @return: a tuple of (dict of node-to-node_error, list of instances
1381
        which need activate-disks, dict of instance: (node, volume) for
1382
        missing volumes
1383

1384
    """
1385
    result = res_nodes, res_instances, res_missing = {}, [], {}
1386

    
1387
    vg_name = self.cfg.GetVGName()
1388
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1389
    instances = [self.cfg.GetInstanceInfo(name)
1390
                 for name in self.cfg.GetInstanceList()]
1391

    
1392
    nv_dict = {}
1393
    for inst in instances:
1394
      inst_lvs = {}
1395
      if (not inst.admin_up or
1396
          inst.disk_template not in constants.DTS_NET_MIRROR):
1397
        continue
1398
      inst.MapLVsByNode(inst_lvs)
1399
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1400
      for node, vol_list in inst_lvs.iteritems():
1401
        for vol in vol_list:
1402
          nv_dict[(node, vol)] = inst
1403

    
1404
    if not nv_dict:
1405
      return result
1406

    
1407
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1408

    
1409
    for node in nodes:
1410
      # node_volume
1411
      node_res = node_lvs[node]
1412
      if node_res.offline:
1413
        continue
1414
      msg = node_res.fail_msg
1415
      if msg:
1416
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1417
        res_nodes[node] = msg
1418
        continue
1419

    
1420
      lvs = node_res.payload
1421
      for lv_name, (_, lv_inactive, lv_online) in lvs.items():
1422
        inst = nv_dict.pop((node, lv_name), None)
1423
        if (not lv_online and inst is not None
1424
            and inst.name not in res_instances):
1425
          res_instances.append(inst.name)
1426

    
1427
    # any leftover items in nv_dict are missing LVs, let's arrange the
1428
    # data better
1429
    for key, inst in nv_dict.iteritems():
1430
      if inst.name not in res_missing:
1431
        res_missing[inst.name] = []
1432
      res_missing[inst.name].append(key)
1433

    
1434
    return result
1435

    
1436

    
1437
class LURenameCluster(LogicalUnit):
1438
  """Rename the cluster.
1439

1440
  """
1441
  HPATH = "cluster-rename"
1442
  HTYPE = constants.HTYPE_CLUSTER
1443
  _OP_REQP = ["name"]
1444

    
1445
  def BuildHooksEnv(self):
1446
    """Build hooks env.
1447

1448
    """
1449
    env = {
1450
      "OP_TARGET": self.cfg.GetClusterName(),
1451
      "NEW_NAME": self.op.name,
1452
      }
1453
    mn = self.cfg.GetMasterNode()
1454
    return env, [mn], [mn]
1455

    
1456
  def CheckPrereq(self):
1457
    """Verify that the passed name is a valid one.
1458

1459
    """
1460
    hostname = utils.HostInfo(self.op.name)
1461

    
1462
    new_name = hostname.name
1463
    self.ip = new_ip = hostname.ip
1464
    old_name = self.cfg.GetClusterName()
1465
    old_ip = self.cfg.GetMasterIP()
1466
    if new_name == old_name and new_ip == old_ip:
1467
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1468
                                 " cluster has changed")
1469
    if new_ip != old_ip:
1470
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1471
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1472
                                   " reachable on the network. Aborting." %
1473
                                   new_ip)
1474

    
1475
    self.op.name = new_name
1476

    
1477
  def Exec(self, feedback_fn):
1478
    """Rename the cluster.
1479

1480
    """
1481
    clustername = self.op.name
1482
    ip = self.ip
1483

    
1484
    # shutdown the master IP
1485
    master = self.cfg.GetMasterNode()
1486
    result = self.rpc.call_node_stop_master(master, False)
1487
    result.Raise("Could not disable the master role")
1488

    
1489
    try:
1490
      cluster = self.cfg.GetClusterInfo()
1491
      cluster.cluster_name = clustername
1492
      cluster.master_ip = ip
1493
      self.cfg.Update(cluster)
1494

    
1495
      # update the known hosts file
1496
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1497
      node_list = self.cfg.GetNodeList()
1498
      try:
1499
        node_list.remove(master)
1500
      except ValueError:
1501
        pass
1502
      result = self.rpc.call_upload_file(node_list,
1503
                                         constants.SSH_KNOWN_HOSTS_FILE)
1504
      for to_node, to_result in result.iteritems():
1505
        msg = to_result.fail_msg
1506
        if msg:
1507
          msg = ("Copy of file %s to node %s failed: %s" %
1508
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1509
          self.proc.LogWarning(msg)
1510

    
1511
    finally:
1512
      result = self.rpc.call_node_start_master(master, False, False)
1513
      msg = result.fail_msg
1514
      if msg:
1515
        self.LogWarning("Could not re-enable the master role on"
1516
                        " the master, please restart manually: %s", msg)
1517

    
1518

    
1519
def _RecursiveCheckIfLVMBased(disk):
1520
  """Check if the given disk or its children are lvm-based.
1521

1522
  @type disk: L{objects.Disk}
1523
  @param disk: the disk to check
1524
  @rtype: boolean
1525
  @return: boolean indicating whether a LD_LV dev_type was found or not
1526

1527
  """
1528
  if disk.children:
1529
    for chdisk in disk.children:
1530
      if _RecursiveCheckIfLVMBased(chdisk):
1531
        return True
1532
  return disk.dev_type == constants.LD_LV
1533

    
1534

    
1535
class LUSetClusterParams(LogicalUnit):
1536
  """Change the parameters of the cluster.
1537

1538
  """
1539
  HPATH = "cluster-modify"
1540
  HTYPE = constants.HTYPE_CLUSTER
1541
  _OP_REQP = []
1542
  REQ_BGL = False
1543

    
1544
  def CheckArguments(self):
1545
    """Check parameters
1546

1547
    """
1548
    if not hasattr(self.op, "candidate_pool_size"):
1549
      self.op.candidate_pool_size = None
1550
    if self.op.candidate_pool_size is not None:
1551
      try:
1552
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1553
      except (ValueError, TypeError), err:
1554
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1555
                                   str(err))
1556
      if self.op.candidate_pool_size < 1:
1557
        raise errors.OpPrereqError("At least one master candidate needed")
1558

    
1559
  def ExpandNames(self):
1560
    # FIXME: in the future maybe other cluster params won't require checking on
1561
    # all nodes to be modified.
1562
    self.needed_locks = {
1563
      locking.LEVEL_NODE: locking.ALL_SET,
1564
    }
1565
    self.share_locks[locking.LEVEL_NODE] = 1
1566

    
1567
  def BuildHooksEnv(self):
1568
    """Build hooks env.
1569

1570
    """
1571
    env = {
1572
      "OP_TARGET": self.cfg.GetClusterName(),
1573
      "NEW_VG_NAME": self.op.vg_name,
1574
      }
1575
    mn = self.cfg.GetMasterNode()
1576
    return env, [mn], [mn]
1577

    
1578
  def CheckPrereq(self):
1579
    """Check prerequisites.
1580

1581
    This checks whether the given params don't conflict and
1582
    if the given volume group is valid.
1583

1584
    """
1585
    if self.op.vg_name is not None and not self.op.vg_name:
1586
      instances = self.cfg.GetAllInstancesInfo().values()
1587
      for inst in instances:
1588
        for disk in inst.disks:
1589
          if _RecursiveCheckIfLVMBased(disk):
1590
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1591
                                       " lvm-based instances exist")
1592

    
1593
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1594

    
1595
    # if vg_name not None, checks given volume group on all nodes
1596
    if self.op.vg_name:
1597
      vglist = self.rpc.call_vg_list(node_list)
1598
      for node in node_list:
1599
        msg = vglist[node].fail_msg
1600
        if msg:
1601
          # ignoring down node
1602
          self.LogWarning("Error while gathering data on node %s"
1603
                          " (ignoring node): %s", node, msg)
1604
          continue
1605
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
1606
                                              self.op.vg_name,
1607
                                              constants.MIN_VG_SIZE)
1608
        if vgstatus:
1609
          raise errors.OpPrereqError("Error on node '%s': %s" %
1610
                                     (node, vgstatus))
1611

    
1612
    self.cluster = cluster = self.cfg.GetClusterInfo()
1613
    # validate params changes
1614
    if self.op.beparams:
1615
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1616
      self.new_beparams = objects.FillDict(
1617
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
1618

    
1619
    if self.op.nicparams:
1620
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
1621
      self.new_nicparams = objects.FillDict(
1622
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
1623
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
1624

    
1625
    # hypervisor list/parameters
1626
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
1627
    if self.op.hvparams:
1628
      if not isinstance(self.op.hvparams, dict):
1629
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
1630
      for hv_name, hv_dict in self.op.hvparams.items():
1631
        if hv_name not in self.new_hvparams:
1632
          self.new_hvparams[hv_name] = hv_dict
1633
        else:
1634
          self.new_hvparams[hv_name].update(hv_dict)
1635

    
1636
    if self.op.enabled_hypervisors is not None:
1637
      self.hv_list = self.op.enabled_hypervisors
1638
      if not self.hv_list:
1639
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
1640
                                   " least one member")
1641
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
1642
      if invalid_hvs:
1643
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
1644
                                   " entries: %s" % invalid_hvs)
1645
    else:
1646
      self.hv_list = cluster.enabled_hypervisors
1647

    
1648
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
1649
      # either the enabled list has changed, or the parameters have, validate
1650
      for hv_name, hv_params in self.new_hvparams.items():
1651
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
1652
            (self.op.enabled_hypervisors and
1653
             hv_name in self.op.enabled_hypervisors)):
1654
          # either this is a new hypervisor, or its parameters have changed
1655
          hv_class = hypervisor.GetHypervisor(hv_name)
1656
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1657
          hv_class.CheckParameterSyntax(hv_params)
1658
          _CheckHVParams(self, node_list, hv_name, hv_params)
1659

    
1660
  def Exec(self, feedback_fn):
1661
    """Change the parameters of the cluster.
1662

1663
    """
1664
    if self.op.vg_name is not None:
1665
      new_volume = self.op.vg_name
1666
      if not new_volume:
1667
        new_volume = None
1668
      if new_volume != self.cfg.GetVGName():
1669
        self.cfg.SetVGName(new_volume)
1670
      else:
1671
        feedback_fn("Cluster LVM configuration already in desired"
1672
                    " state, not changing")
1673
    if self.op.hvparams:
1674
      self.cluster.hvparams = self.new_hvparams
1675
    if self.op.enabled_hypervisors is not None:
1676
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1677
    if self.op.beparams:
1678
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
1679
    if self.op.nicparams:
1680
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
1681

    
1682
    if self.op.candidate_pool_size is not None:
1683
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
1684
      # we need to update the pool size here, otherwise the save will fail
1685
      _AdjustCandidatePool(self)
1686

    
1687
    self.cfg.Update(self.cluster)
1688

    
1689

    
1690
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
1691
  """Distribute additional files which are part of the cluster configuration.
1692

1693
  ConfigWriter takes care of distributing the config and ssconf files, but
1694
  there are more files which should be distributed to all nodes. This function
1695
  makes sure those are copied.
1696

1697
  @param lu: calling logical unit
1698
  @param additional_nodes: list of nodes not in the config to distribute to
1699

1700
  """
1701
  # 1. Gather target nodes
1702
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
1703
  dist_nodes = lu.cfg.GetNodeList()
1704
  if additional_nodes is not None:
1705
    dist_nodes.extend(additional_nodes)
1706
  if myself.name in dist_nodes:
1707
    dist_nodes.remove(myself.name)
1708
  # 2. Gather files to distribute
1709
  dist_files = set([constants.ETC_HOSTS,
1710
                    constants.SSH_KNOWN_HOSTS_FILE,
1711
                    constants.RAPI_CERT_FILE,
1712
                    constants.RAPI_USERS_FILE,
1713
                    constants.HMAC_CLUSTER_KEY,
1714
                   ])
1715

    
1716
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
1717
  for hv_name in enabled_hypervisors:
1718
    hv_class = hypervisor.GetHypervisor(hv_name)
1719
    dist_files.update(hv_class.GetAncillaryFiles())
1720

    
1721
  # 3. Perform the files upload
1722
  for fname in dist_files:
1723
    if os.path.exists(fname):
1724
      result = lu.rpc.call_upload_file(dist_nodes, fname)
1725
      for to_node, to_result in result.items():
1726
        msg = to_result.fail_msg
1727
        if msg:
1728
          msg = ("Copy of file %s to node %s failed: %s" %
1729
                 (fname, to_node, msg))
1730
          lu.proc.LogWarning(msg)
1731

    
1732

    
1733
class LURedistributeConfig(NoHooksLU):
1734
  """Force the redistribution of cluster configuration.
1735

1736
  This is a very simple LU.
1737

1738
  """
1739
  _OP_REQP = []
1740
  REQ_BGL = False
1741

    
1742
  def ExpandNames(self):
1743
    self.needed_locks = {
1744
      locking.LEVEL_NODE: locking.ALL_SET,
1745
    }
1746
    self.share_locks[locking.LEVEL_NODE] = 1
1747

    
1748
  def CheckPrereq(self):
1749
    """Check prerequisites.
1750

1751
    """
1752

    
1753
  def Exec(self, feedback_fn):
1754
    """Redistribute the configuration.
1755

1756
    """
1757
    self.cfg.Update(self.cfg.GetClusterInfo())
1758
    _RedistributeAncillaryFiles(self)
1759

    
1760

    
1761
def _WaitForSync(lu, instance, oneshot=False, unlock=False):
1762
  """Sleep and poll for an instance's disk to sync.
1763

1764
  """
1765
  if not instance.disks:
1766
    return True
1767

    
1768
  if not oneshot:
1769
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
1770

    
1771
  node = instance.primary_node
1772

    
1773
  for dev in instance.disks:
1774
    lu.cfg.SetDiskID(dev, node)
1775

    
1776
  retries = 0
1777
  degr_retries = 10 # in seconds, as we sleep 1 second each time
1778
  while True:
1779
    max_time = 0
1780
    done = True
1781
    cumul_degraded = False
1782
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
1783
    msg = rstats.fail_msg
1784
    if msg:
1785
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
1786
      retries += 1
1787
      if retries >= 10:
1788
        raise errors.RemoteError("Can't contact node %s for mirror data,"
1789
                                 " aborting." % node)
1790
      time.sleep(6)
1791
      continue
1792
    rstats = rstats.payload
1793
    retries = 0
1794
    for i, mstat in enumerate(rstats):
1795
      if mstat is None:
1796
        lu.LogWarning("Can't compute data for node %s/%s",
1797
                           node, instance.disks[i].iv_name)
1798
        continue
1799
      # we ignore the ldisk parameter
1800
      perc_done, est_time, is_degraded, _ = mstat
1801
      cumul_degraded = cumul_degraded or (is_degraded and perc_done is None)
1802
      if perc_done is not None:
1803
        done = False
1804
        if est_time is not None:
1805
          rem_time = "%d estimated seconds remaining" % est_time
1806
          max_time = est_time
1807
        else:
1808
          rem_time = "no time estimate"
1809
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
1810
                        (instance.disks[i].iv_name, perc_done, rem_time))
1811

    
1812
    # if we're done but degraded, let's do a few small retries, to
1813
    # make sure we see a stable and not transient situation; therefore
1814
    # we force restart of the loop
1815
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
1816
      logging.info("Degraded disks found, %d retries left", degr_retries)
1817
      degr_retries -= 1
1818
      time.sleep(1)
1819
      continue
1820

    
1821
    if done or oneshot:
1822
      break
1823

    
1824
    time.sleep(min(60, max_time))
1825

    
1826
  if done:
1827
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
1828
  return not cumul_degraded
1829

    
1830

    
1831
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
1832
  """Check that mirrors are not degraded.
1833

1834
  The ldisk parameter, if True, will change the test from the
1835
  is_degraded attribute (which represents overall non-ok status for
1836
  the device(s)) to the ldisk (representing the local storage status).
1837

1838
  """
1839
  lu.cfg.SetDiskID(dev, node)
1840
  if ldisk:
1841
    idx = 6
1842
  else:
1843
    idx = 5
1844

    
1845
  result = True
1846
  if on_primary or dev.AssembleOnSecondary():
1847
    rstats = lu.rpc.call_blockdev_find(node, dev)
1848
    msg = rstats.fail_msg
1849
    if msg:
1850
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
1851
      result = False
1852
    elif not rstats.payload:
1853
      lu.LogWarning("Can't find disk on node %s", node)
1854
      result = False
1855
    else:
1856
      result = result and (not rstats.payload[idx])
1857
  if dev.children:
1858
    for child in dev.children:
1859
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
1860

    
1861
  return result
1862

    
1863

    
1864
class LUDiagnoseOS(NoHooksLU):
1865
  """Logical unit for OS diagnose/query.
1866

1867
  """
1868
  _OP_REQP = ["output_fields", "names"]
1869
  REQ_BGL = False
1870
  _FIELDS_STATIC = utils.FieldSet()
1871
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status")
1872

    
1873
  def ExpandNames(self):
1874
    if self.op.names:
1875
      raise errors.OpPrereqError("Selective OS query not supported")
1876

    
1877
    _CheckOutputFields(static=self._FIELDS_STATIC,
1878
                       dynamic=self._FIELDS_DYNAMIC,
1879
                       selected=self.op.output_fields)
1880

    
1881
    # Lock all nodes, in shared mode
1882
    # Temporary removal of locks, should be reverted later
1883
    # TODO: reintroduce locks when they are lighter-weight
1884
    self.needed_locks = {}
1885
    #self.share_locks[locking.LEVEL_NODE] = 1
1886
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
1887

    
1888
  def CheckPrereq(self):
1889
    """Check prerequisites.
1890

1891
    """
1892

    
1893
  @staticmethod
1894
  def _DiagnoseByOS(node_list, rlist):
1895
    """Remaps a per-node return list into an a per-os per-node dictionary
1896

1897
    @param node_list: a list with the names of all nodes
1898
    @param rlist: a map with node names as keys and OS objects as values
1899

1900
    @rtype: dict
1901
    @return: a dictionary with osnames as keys and as value another map, with
1902
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
1903

1904
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
1905
                                     (/srv/..., False, "invalid api")],
1906
                           "node2": [(/srv/..., True, "")]}
1907
          }
1908

1909
    """
1910
    all_os = {}
1911
    # we build here the list of nodes that didn't fail the RPC (at RPC
1912
    # level), so that nodes with a non-responding node daemon don't
1913
    # make all OSes invalid
1914
    good_nodes = [node_name for node_name in rlist
1915
                  if not rlist[node_name].fail_msg]
1916
    for node_name, nr in rlist.items():
1917
      if nr.fail_msg or not nr.payload:
1918
        continue
1919
      for name, path, status, diagnose in nr.payload:
1920
        if name not in all_os:
1921
          # build a list of nodes for this os containing empty lists
1922
          # for each node in node_list
1923
          all_os[name] = {}
1924
          for nname in good_nodes:
1925
            all_os[name][nname] = []
1926
        all_os[name][node_name].append((path, status, diagnose))
1927
    return all_os
1928

    
1929
  def Exec(self, feedback_fn):
1930
    """Compute the list of OSes.
1931

1932
    """
1933
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
1934
    node_data = self.rpc.call_os_diagnose(valid_nodes)
1935
    pol = self._DiagnoseByOS(valid_nodes, node_data)
1936
    output = []
1937
    for os_name, os_data in pol.items():
1938
      row = []
1939
      for field in self.op.output_fields:
1940
        if field == "name":
1941
          val = os_name
1942
        elif field == "valid":
1943
          val = utils.all([osl and osl[0][1] for osl in os_data.values()])
1944
        elif field == "node_status":
1945
          # this is just a copy of the dict
1946
          val = {}
1947
          for node_name, nos_list in os_data.items():
1948
            val[node_name] = nos_list
1949
        else:
1950
          raise errors.ParameterError(field)
1951
        row.append(val)
1952
      output.append(row)
1953

    
1954
    return output
1955

    
1956

    
1957
class LURemoveNode(LogicalUnit):
1958
  """Logical unit for removing a node.
1959

1960
  """
1961
  HPATH = "node-remove"
1962
  HTYPE = constants.HTYPE_NODE
1963
  _OP_REQP = ["node_name"]
1964

    
1965
  def BuildHooksEnv(self):
1966
    """Build hooks env.
1967

1968
    This doesn't run on the target node in the pre phase as a failed
1969
    node would then be impossible to remove.
1970

1971
    """
1972
    env = {
1973
      "OP_TARGET": self.op.node_name,
1974
      "NODE_NAME": self.op.node_name,
1975
      }
1976
    all_nodes = self.cfg.GetNodeList()
1977
    all_nodes.remove(self.op.node_name)
1978
    return env, all_nodes, all_nodes
1979

    
1980
  def CheckPrereq(self):
1981
    """Check prerequisites.
1982

1983
    This checks:
1984
     - the node exists in the configuration
1985
     - it does not have primary or secondary instances
1986
     - it's not the master
1987

1988
    Any errors are signaled by raising errors.OpPrereqError.
1989

1990
    """
1991
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
1992
    if node is None:
1993
      raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
1994

    
1995
    instance_list = self.cfg.GetInstanceList()
1996

    
1997
    masternode = self.cfg.GetMasterNode()
1998
    if node.name == masternode:
1999
      raise errors.OpPrereqError("Node is the master node,"
2000
                                 " you need to failover first.")
2001

    
2002
    for instance_name in instance_list:
2003
      instance = self.cfg.GetInstanceInfo(instance_name)
2004
      if node.name in instance.all_nodes:
2005
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2006
                                   " please remove first." % instance_name)
2007
    self.op.node_name = node.name
2008
    self.node = node
2009

    
2010
  def Exec(self, feedback_fn):
2011
    """Removes the node from the cluster.
2012

2013
    """
2014
    node = self.node
2015
    logging.info("Stopping the node daemon and removing configs from node %s",
2016
                 node.name)
2017

    
2018
    self.context.RemoveNode(node.name)
2019

    
2020
    result = self.rpc.call_node_leave_cluster(node.name)
2021
    msg = result.fail_msg
2022
    if msg:
2023
      self.LogWarning("Errors encountered on the remote node while leaving"
2024
                      " the cluster: %s", msg)
2025

    
2026
    # Promote nodes to master candidate as needed
2027
    _AdjustCandidatePool(self)
2028

    
2029

    
2030
class LUQueryNodes(NoHooksLU):
2031
  """Logical unit for querying nodes.
2032

2033
  """
2034
  _OP_REQP = ["output_fields", "names", "use_locking"]
2035
  REQ_BGL = False
2036
  _FIELDS_DYNAMIC = utils.FieldSet(
2037
    "dtotal", "dfree",
2038
    "mtotal", "mnode", "mfree",
2039
    "bootid",
2040
    "ctotal", "cnodes", "csockets",
2041
    )
2042

    
2043
  _FIELDS_STATIC = utils.FieldSet(
2044
    "name", "pinst_cnt", "sinst_cnt",
2045
    "pinst_list", "sinst_list",
2046
    "pip", "sip", "tags",
2047
    "serial_no",
2048
    "master_candidate",
2049
    "master",
2050
    "offline",
2051
    "drained",
2052
    "role",
2053
    )
2054

    
2055
  def ExpandNames(self):
2056
    _CheckOutputFields(static=self._FIELDS_STATIC,
2057
                       dynamic=self._FIELDS_DYNAMIC,
2058
                       selected=self.op.output_fields)
2059

    
2060
    self.needed_locks = {}
2061
    self.share_locks[locking.LEVEL_NODE] = 1
2062

    
2063
    if self.op.names:
2064
      self.wanted = _GetWantedNodes(self, self.op.names)
2065
    else:
2066
      self.wanted = locking.ALL_SET
2067

    
2068
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2069
    self.do_locking = self.do_node_query and self.op.use_locking
2070
    if self.do_locking:
2071
      # if we don't request only static fields, we need to lock the nodes
2072
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2073

    
2074

    
2075
  def CheckPrereq(self):
2076
    """Check prerequisites.
2077

2078
    """
2079
    # The validation of the node list is done in the _GetWantedNodes,
2080
    # if non empty, and if empty, there's no validation to do
2081
    pass
2082

    
2083
  def Exec(self, feedback_fn):
2084
    """Computes the list of nodes and their attributes.
2085

2086
    """
2087
    all_info = self.cfg.GetAllNodesInfo()
2088
    if self.do_locking:
2089
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2090
    elif self.wanted != locking.ALL_SET:
2091
      nodenames = self.wanted
2092
      missing = set(nodenames).difference(all_info.keys())
2093
      if missing:
2094
        raise errors.OpExecError(
2095
          "Some nodes were removed before retrieving their data: %s" % missing)
2096
    else:
2097
      nodenames = all_info.keys()
2098

    
2099
    nodenames = utils.NiceSort(nodenames)
2100
    nodelist = [all_info[name] for name in nodenames]
2101

    
2102
    # begin data gathering
2103

    
2104
    if self.do_node_query:
2105
      live_data = {}
2106
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2107
                                          self.cfg.GetHypervisorType())
2108
      for name in nodenames:
2109
        nodeinfo = node_data[name]
2110
        if not nodeinfo.fail_msg and nodeinfo.payload:
2111
          nodeinfo = nodeinfo.payload
2112
          fn = utils.TryConvert
2113
          live_data[name] = {
2114
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2115
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2116
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2117
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2118
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2119
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2120
            "bootid": nodeinfo.get('bootid', None),
2121
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2122
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2123
            }
2124
        else:
2125
          live_data[name] = {}
2126
    else:
2127
      live_data = dict.fromkeys(nodenames, {})
2128

    
2129
    node_to_primary = dict([(name, set()) for name in nodenames])
2130
    node_to_secondary = dict([(name, set()) for name in nodenames])
2131

    
2132
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2133
                             "sinst_cnt", "sinst_list"))
2134
    if inst_fields & frozenset(self.op.output_fields):
2135
      instancelist = self.cfg.GetInstanceList()
2136

    
2137
      for instance_name in instancelist:
2138
        inst = self.cfg.GetInstanceInfo(instance_name)
2139
        if inst.primary_node in node_to_primary:
2140
          node_to_primary[inst.primary_node].add(inst.name)
2141
        for secnode in inst.secondary_nodes:
2142
          if secnode in node_to_secondary:
2143
            node_to_secondary[secnode].add(inst.name)
2144

    
2145
    master_node = self.cfg.GetMasterNode()
2146

    
2147
    # end data gathering
2148

    
2149
    output = []
2150
    for node in nodelist:
2151
      node_output = []
2152
      for field in self.op.output_fields:
2153
        if field == "name":
2154
          val = node.name
2155
        elif field == "pinst_list":
2156
          val = list(node_to_primary[node.name])
2157
        elif field == "sinst_list":
2158
          val = list(node_to_secondary[node.name])
2159
        elif field == "pinst_cnt":
2160
          val = len(node_to_primary[node.name])
2161
        elif field == "sinst_cnt":
2162
          val = len(node_to_secondary[node.name])
2163
        elif field == "pip":
2164
          val = node.primary_ip
2165
        elif field == "sip":
2166
          val = node.secondary_ip
2167
        elif field == "tags":
2168
          val = list(node.GetTags())
2169
        elif field == "serial_no":
2170
          val = node.serial_no
2171
        elif field == "master_candidate":
2172
          val = node.master_candidate
2173
        elif field == "master":
2174
          val = node.name == master_node
2175
        elif field == "offline":
2176
          val = node.offline
2177
        elif field == "drained":
2178
          val = node.drained
2179
        elif self._FIELDS_DYNAMIC.Matches(field):
2180
          val = live_data[node.name].get(field, None)
2181
        elif field == "role":
2182
          if node.name == master_node:
2183
            val = "M"
2184
          elif node.master_candidate:
2185
            val = "C"
2186
          elif node.drained:
2187
            val = "D"
2188
          elif node.offline:
2189
            val = "O"
2190
          else:
2191
            val = "R"
2192
        else:
2193
          raise errors.ParameterError(field)
2194
        node_output.append(val)
2195
      output.append(node_output)
2196

    
2197
    return output
2198

    
2199

    
2200
class LUQueryNodeVolumes(NoHooksLU):
2201
  """Logical unit for getting volumes on node(s).
2202

2203
  """
2204
  _OP_REQP = ["nodes", "output_fields"]
2205
  REQ_BGL = False
2206
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2207
  _FIELDS_STATIC = utils.FieldSet("node")
2208

    
2209
  def ExpandNames(self):
2210
    _CheckOutputFields(static=self._FIELDS_STATIC,
2211
                       dynamic=self._FIELDS_DYNAMIC,
2212
                       selected=self.op.output_fields)
2213

    
2214
    self.needed_locks = {}
2215
    self.share_locks[locking.LEVEL_NODE] = 1
2216
    if not self.op.nodes:
2217
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2218
    else:
2219
      self.needed_locks[locking.LEVEL_NODE] = \
2220
        _GetWantedNodes(self, self.op.nodes)
2221

    
2222
  def CheckPrereq(self):
2223
    """Check prerequisites.
2224

2225
    This checks that the fields required are valid output fields.
2226

2227
    """
2228
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2229

    
2230
  def Exec(self, feedback_fn):
2231
    """Computes the list of nodes and their attributes.
2232

2233
    """
2234
    nodenames = self.nodes
2235
    volumes = self.rpc.call_node_volumes(nodenames)
2236

    
2237
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2238
             in self.cfg.GetInstanceList()]
2239

    
2240
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2241

    
2242
    output = []
2243
    for node in nodenames:
2244
      nresult = volumes[node]
2245
      if nresult.offline:
2246
        continue
2247
      msg = nresult.fail_msg
2248
      if msg:
2249
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2250
        continue
2251

    
2252
      node_vols = nresult.payload[:]
2253
      node_vols.sort(key=lambda vol: vol['dev'])
2254

    
2255
      for vol in node_vols:
2256
        node_output = []
2257
        for field in self.op.output_fields:
2258
          if field == "node":
2259
            val = node
2260
          elif field == "phys":
2261
            val = vol['dev']
2262
          elif field == "vg":
2263
            val = vol['vg']
2264
          elif field == "name":
2265
            val = vol['name']
2266
          elif field == "size":
2267
            val = int(float(vol['size']))
2268
          elif field == "instance":
2269
            for inst in ilist:
2270
              if node not in lv_by_node[inst]:
2271
                continue
2272
              if vol['name'] in lv_by_node[inst][node]:
2273
                val = inst.name
2274
                break
2275
            else:
2276
              val = '-'
2277
          else:
2278
            raise errors.ParameterError(field)
2279
          node_output.append(str(val))
2280

    
2281
        output.append(node_output)
2282

    
2283
    return output
2284

    
2285

    
2286
class LUAddNode(LogicalUnit):
2287
  """Logical unit for adding node to the cluster.
2288

2289
  """
2290
  HPATH = "node-add"
2291
  HTYPE = constants.HTYPE_NODE
2292
  _OP_REQP = ["node_name"]
2293

    
2294
  def BuildHooksEnv(self):
2295
    """Build hooks env.
2296

2297
    This will run on all nodes before, and on all nodes + the new node after.
2298

2299
    """
2300
    env = {
2301
      "OP_TARGET": self.op.node_name,
2302
      "NODE_NAME": self.op.node_name,
2303
      "NODE_PIP": self.op.primary_ip,
2304
      "NODE_SIP": self.op.secondary_ip,
2305
      }
2306
    nodes_0 = self.cfg.GetNodeList()
2307
    nodes_1 = nodes_0 + [self.op.node_name, ]
2308
    return env, nodes_0, nodes_1
2309

    
2310
  def CheckPrereq(self):
2311
    """Check prerequisites.
2312

2313
    This checks:
2314
     - the new node is not already in the config
2315
     - it is resolvable
2316
     - its parameters (single/dual homed) matches the cluster
2317

2318
    Any errors are signaled by raising errors.OpPrereqError.
2319

2320
    """
2321
    node_name = self.op.node_name
2322
    cfg = self.cfg
2323

    
2324
    dns_data = utils.HostInfo(node_name)
2325

    
2326
    node = dns_data.name
2327
    primary_ip = self.op.primary_ip = dns_data.ip
2328
    secondary_ip = getattr(self.op, "secondary_ip", None)
2329
    if secondary_ip is None:
2330
      secondary_ip = primary_ip
2331
    if not utils.IsValidIP(secondary_ip):
2332
      raise errors.OpPrereqError("Invalid secondary IP given")
2333
    self.op.secondary_ip = secondary_ip
2334

    
2335
    node_list = cfg.GetNodeList()
2336
    if not self.op.readd and node in node_list:
2337
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2338
                                 node)
2339
    elif self.op.readd and node not in node_list:
2340
      raise errors.OpPrereqError("Node %s is not in the configuration" % node)
2341

    
2342
    for existing_node_name in node_list:
2343
      existing_node = cfg.GetNodeInfo(existing_node_name)
2344

    
2345
      if self.op.readd and node == existing_node_name:
2346
        if (existing_node.primary_ip != primary_ip or
2347
            existing_node.secondary_ip != secondary_ip):
2348
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2349
                                     " address configuration as before")
2350
        continue
2351

    
2352
      if (existing_node.primary_ip == primary_ip or
2353
          existing_node.secondary_ip == primary_ip or
2354
          existing_node.primary_ip == secondary_ip or
2355
          existing_node.secondary_ip == secondary_ip):
2356
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2357
                                   " existing node %s" % existing_node.name)
2358

    
2359
    # check that the type of the node (single versus dual homed) is the
2360
    # same as for the master
2361
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2362
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2363
    newbie_singlehomed = secondary_ip == primary_ip
2364
    if master_singlehomed != newbie_singlehomed:
2365
      if master_singlehomed:
2366
        raise errors.OpPrereqError("The master has no private ip but the"
2367
                                   " new node has one")
2368
      else:
2369
        raise errors.OpPrereqError("The master has a private ip but the"
2370
                                   " new node doesn't have one")
2371

    
2372
    # checks reachability
2373
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2374
      raise errors.OpPrereqError("Node not reachable by ping")
2375

    
2376
    if not newbie_singlehomed:
2377
      # check reachability from my secondary ip to newbie's secondary ip
2378
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2379
                           source=myself.secondary_ip):
2380
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2381
                                   " based ping to noded port")
2382

    
2383
    cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2384
    if self.op.readd:
2385
      exceptions = [node]
2386
    else:
2387
      exceptions = []
2388
    mc_now, mc_max = self.cfg.GetMasterCandidateStats(exceptions)
2389
    # the new node will increase mc_max with one, so:
2390
    mc_max = min(mc_max + 1, cp_size)
2391
    self.master_candidate = mc_now < mc_max
2392

    
2393
    if self.op.readd:
2394
      self.new_node = self.cfg.GetNodeInfo(node)
2395
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
2396
    else:
2397
      self.new_node = objects.Node(name=node,
2398
                                   primary_ip=primary_ip,
2399
                                   secondary_ip=secondary_ip,
2400
                                   master_candidate=self.master_candidate,
2401
                                   offline=False, drained=False)
2402

    
2403
  def Exec(self, feedback_fn):
2404
    """Adds the new node to the cluster.
2405

2406
    """
2407
    new_node = self.new_node
2408
    node = new_node.name
2409

    
2410
    # for re-adds, reset the offline/drained/master-candidate flags;
2411
    # we need to reset here, otherwise offline would prevent RPC calls
2412
    # later in the procedure; this also means that if the re-add
2413
    # fails, we are left with a non-offlined, broken node
2414
    if self.op.readd:
2415
      new_node.drained = new_node.offline = False
2416
      self.LogInfo("Readding a node, the offline/drained flags were reset")
2417
      # if we demote the node, we do cleanup later in the procedure
2418
      new_node.master_candidate = self.master_candidate
2419

    
2420
    # notify the user about any possible mc promotion
2421
    if new_node.master_candidate:
2422
      self.LogInfo("Node will be a master candidate")
2423

    
2424
    # check connectivity
2425
    result = self.rpc.call_version([node])[node]
2426
    result.Raise("Can't get version information from node %s" % node)
2427
    if constants.PROTOCOL_VERSION == result.payload:
2428
      logging.info("Communication to node %s fine, sw version %s match",
2429
                   node, result.payload)
2430
    else:
2431
      raise errors.OpExecError("Version mismatch master version %s,"
2432
                               " node version %s" %
2433
                               (constants.PROTOCOL_VERSION, result.payload))
2434

    
2435
    # setup ssh on node
2436
    logging.info("Copy ssh key to node %s", node)
2437
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
2438
    keyarray = []
2439
    keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
2440
                constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
2441
                priv_key, pub_key]
2442

    
2443
    for i in keyfiles:
2444
      f = open(i, 'r')
2445
      try:
2446
        keyarray.append(f.read())
2447
      finally:
2448
        f.close()
2449

    
2450
    result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
2451
                                    keyarray[2],
2452
                                    keyarray[3], keyarray[4], keyarray[5])
2453
    result.Raise("Cannot transfer ssh keys to the new node")
2454

    
2455
    # Add node to our /etc/hosts, and add key to known_hosts
2456
    if self.cfg.GetClusterInfo().modify_etc_hosts:
2457
      utils.AddHostToEtcHosts(new_node.name)
2458

    
2459
    if new_node.secondary_ip != new_node.primary_ip:
2460
      result = self.rpc.call_node_has_ip_address(new_node.name,
2461
                                                 new_node.secondary_ip)
2462
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
2463
                   prereq=True)
2464
      if not result.payload:
2465
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
2466
                                 " you gave (%s). Please fix and re-run this"
2467
                                 " command." % new_node.secondary_ip)
2468

    
2469
    node_verify_list = [self.cfg.GetMasterNode()]
2470
    node_verify_param = {
2471
      'nodelist': [node],
2472
      # TODO: do a node-net-test as well?
2473
    }
2474

    
2475
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2476
                                       self.cfg.GetClusterName())
2477
    for verifier in node_verify_list:
2478
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
2479
      nl_payload = result[verifier].payload['nodelist']
2480
      if nl_payload:
2481
        for failed in nl_payload:
2482
          feedback_fn("ssh/hostname verification failed %s -> %s" %
2483
                      (verifier, nl_payload[failed]))
2484
        raise errors.OpExecError("ssh/hostname verification failed.")
2485

    
2486
    if self.op.readd:
2487
      _RedistributeAncillaryFiles(self)
2488
      self.context.ReaddNode(new_node)
2489
      # make sure we redistribute the config
2490
      self.cfg.Update(new_node)
2491
      # and make sure the new node will not have old files around
2492
      if not new_node.master_candidate:
2493
        result = self.rpc.call_node_demote_from_mc(new_node.name)
2494
        msg = result.RemoteFailMsg()
2495
        if msg:
2496
          self.LogWarning("Node failed to demote itself from master"
2497
                          " candidate status: %s" % msg)
2498
    else:
2499
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
2500
      self.context.AddNode(new_node)
2501

    
2502

    
2503
class LUSetNodeParams(LogicalUnit):
2504
  """Modifies the parameters of a node.
2505

2506
  """
2507
  HPATH = "node-modify"
2508
  HTYPE = constants.HTYPE_NODE
2509
  _OP_REQP = ["node_name"]
2510
  REQ_BGL = False
2511

    
2512
  def CheckArguments(self):
2513
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2514
    if node_name is None:
2515
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2516
    self.op.node_name = node_name
2517
    _CheckBooleanOpField(self.op, 'master_candidate')
2518
    _CheckBooleanOpField(self.op, 'offline')
2519
    _CheckBooleanOpField(self.op, 'drained')
2520
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
2521
    if all_mods.count(None) == 3:
2522
      raise errors.OpPrereqError("Please pass at least one modification")
2523
    if all_mods.count(True) > 1:
2524
      raise errors.OpPrereqError("Can't set the node into more than one"
2525
                                 " state at the same time")
2526

    
2527
  def ExpandNames(self):
2528
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
2529

    
2530
  def BuildHooksEnv(self):
2531
    """Build hooks env.
2532

2533
    This runs on the master node.
2534

2535
    """
2536
    env = {
2537
      "OP_TARGET": self.op.node_name,
2538
      "MASTER_CANDIDATE": str(self.op.master_candidate),
2539
      "OFFLINE": str(self.op.offline),
2540
      "DRAINED": str(self.op.drained),
2541
      }
2542
    nl = [self.cfg.GetMasterNode(),
2543
          self.op.node_name]
2544
    return env, nl, nl
2545

    
2546
  def CheckPrereq(self):
2547
    """Check prerequisites.
2548

2549
    This only checks the instance list against the existing names.
2550

2551
    """
2552
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
2553

    
2554
    if ((self.op.master_candidate == False or self.op.offline == True or
2555
         self.op.drained == True) and node.master_candidate):
2556
      # we will demote the node from master_candidate
2557
      if self.op.node_name == self.cfg.GetMasterNode():
2558
        raise errors.OpPrereqError("The master node has to be a"
2559
                                   " master candidate, online and not drained")
2560
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2561
      num_candidates, _ = self.cfg.GetMasterCandidateStats()
2562
      if num_candidates <= cp_size:
2563
        msg = ("Not enough master candidates (desired"
2564
               " %d, new value will be %d)" % (cp_size, num_candidates-1))
2565
        if self.op.force:
2566
          self.LogWarning(msg)
2567
        else:
2568
          raise errors.OpPrereqError(msg)
2569

    
2570
    if (self.op.master_candidate == True and
2571
        ((node.offline and not self.op.offline == False) or
2572
         (node.drained and not self.op.drained == False))):
2573
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
2574
                                 " to master_candidate" % node.name)
2575

    
2576
    return
2577

    
2578
  def Exec(self, feedback_fn):
2579
    """Modifies a node.
2580

2581
    """
2582
    node = self.node
2583

    
2584
    result = []
2585
    changed_mc = False
2586

    
2587
    if self.op.offline is not None:
2588
      node.offline = self.op.offline
2589
      result.append(("offline", str(self.op.offline)))
2590
      if self.op.offline == True:
2591
        if node.master_candidate:
2592
          node.master_candidate = False
2593
          changed_mc = True
2594
          result.append(("master_candidate", "auto-demotion due to offline"))
2595
        if node.drained:
2596
          node.drained = False
2597
          result.append(("drained", "clear drained status due to offline"))
2598

    
2599
    if self.op.master_candidate is not None:
2600
      node.master_candidate = self.op.master_candidate
2601
      changed_mc = True
2602
      result.append(("master_candidate", str(self.op.master_candidate)))
2603
      if self.op.master_candidate == False:
2604
        rrc = self.rpc.call_node_demote_from_mc(node.name)
2605
        msg = rrc.fail_msg
2606
        if msg:
2607
          self.LogWarning("Node failed to demote itself: %s" % msg)
2608

    
2609
    if self.op.drained is not None:
2610
      node.drained = self.op.drained
2611
      result.append(("drained", str(self.op.drained)))
2612
      if self.op.drained == True:
2613
        if node.master_candidate:
2614
          node.master_candidate = False
2615
          changed_mc = True
2616
          result.append(("master_candidate", "auto-demotion due to drain"))
2617
          rrc = self.rpc.call_node_demote_from_mc(node.name)
2618
          msg = rrc.RemoteFailMsg()
2619
          if msg:
2620
            self.LogWarning("Node failed to demote itself: %s" % msg)
2621
        if node.offline:
2622
          node.offline = False
2623
          result.append(("offline", "clear offline status due to drain"))
2624

    
2625
    # this will trigger configuration file update, if needed
2626
    self.cfg.Update(node)
2627
    # this will trigger job queue propagation or cleanup
2628
    if changed_mc:
2629
      self.context.ReaddNode(node)
2630

    
2631
    return result
2632

    
2633

    
2634
class LUPowercycleNode(NoHooksLU):
2635
  """Powercycles a node.
2636

2637
  """
2638
  _OP_REQP = ["node_name", "force"]
2639
  REQ_BGL = False
2640

    
2641
  def CheckArguments(self):
2642
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2643
    if node_name is None:
2644
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2645
    self.op.node_name = node_name
2646
    if node_name == self.cfg.GetMasterNode() and not self.op.force:
2647
      raise errors.OpPrereqError("The node is the master and the force"
2648
                                 " parameter was not set")
2649

    
2650
  def ExpandNames(self):
2651
    """Locking for PowercycleNode.
2652

2653
    This is a last-resource option and shouldn't block on other
2654
    jobs. Therefore, we grab no locks.
2655

2656
    """
2657
    self.needed_locks = {}
2658

    
2659
  def CheckPrereq(self):
2660
    """Check prerequisites.
2661

2662
    This LU has no prereqs.
2663

2664
    """
2665
    pass
2666

    
2667
  def Exec(self, feedback_fn):
2668
    """Reboots a node.
2669

2670
    """
2671
    result = self.rpc.call_node_powercycle(self.op.node_name,
2672
                                           self.cfg.GetHypervisorType())
2673
    result.Raise("Failed to schedule the reboot")
2674
    return result.payload
2675

    
2676

    
2677
class LUQueryClusterInfo(NoHooksLU):
2678
  """Query cluster configuration.
2679

2680
  """
2681
  _OP_REQP = []
2682
  REQ_BGL = False
2683

    
2684
  def ExpandNames(self):
2685
    self.needed_locks = {}
2686

    
2687
  def CheckPrereq(self):
2688
    """No prerequsites needed for this LU.
2689

2690
    """
2691
    pass
2692

    
2693
  def Exec(self, feedback_fn):
2694
    """Return cluster config.
2695

2696
    """
2697
    cluster = self.cfg.GetClusterInfo()
2698
    result = {
2699
      "software_version": constants.RELEASE_VERSION,
2700
      "protocol_version": constants.PROTOCOL_VERSION,
2701
      "config_version": constants.CONFIG_VERSION,
2702
      "os_api_version": max(constants.OS_API_VERSIONS),
2703
      "export_version": constants.EXPORT_VERSION,
2704
      "architecture": (platform.architecture()[0], platform.machine()),
2705
      "name": cluster.cluster_name,
2706
      "master": cluster.master_node,
2707
      "default_hypervisor": cluster.enabled_hypervisors[0],
2708
      "enabled_hypervisors": cluster.enabled_hypervisors,
2709
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
2710
                        for hypervisor_name in cluster.enabled_hypervisors]),
2711
      "beparams": cluster.beparams,
2712
      "nicparams": cluster.nicparams,
2713
      "candidate_pool_size": cluster.candidate_pool_size,
2714
      "master_netdev": cluster.master_netdev,
2715
      "volume_group_name": cluster.volume_group_name,
2716
      "file_storage_dir": cluster.file_storage_dir,
2717
      }
2718

    
2719
    return result
2720

    
2721

    
2722
class LUQueryConfigValues(NoHooksLU):
2723
  """Return configuration values.
2724

2725
  """
2726
  _OP_REQP = []
2727
  REQ_BGL = False
2728
  _FIELDS_DYNAMIC = utils.FieldSet()
2729
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag")
2730

    
2731
  def ExpandNames(self):
2732
    self.needed_locks = {}
2733

    
2734
    _CheckOutputFields(static=self._FIELDS_STATIC,
2735
                       dynamic=self._FIELDS_DYNAMIC,
2736
                       selected=self.op.output_fields)
2737

    
2738
  def CheckPrereq(self):
2739
    """No prerequisites.
2740

2741
    """
2742
    pass
2743

    
2744
  def Exec(self, feedback_fn):
2745
    """Dump a representation of the cluster config to the standard output.
2746

2747
    """
2748
    values = []
2749
    for field in self.op.output_fields:
2750
      if field == "cluster_name":
2751
        entry = self.cfg.GetClusterName()
2752
      elif field == "master_node":
2753
        entry = self.cfg.GetMasterNode()
2754
      elif field == "drain_flag":
2755
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
2756
      else:
2757
        raise errors.ParameterError(field)
2758
      values.append(entry)
2759
    return values
2760

    
2761

    
2762
class LUActivateInstanceDisks(NoHooksLU):
2763
  """Bring up an instance's disks.
2764

2765
  """
2766
  _OP_REQP = ["instance_name"]
2767
  REQ_BGL = False
2768

    
2769
  def ExpandNames(self):
2770
    self._ExpandAndLockInstance()
2771
    self.needed_locks[locking.LEVEL_NODE] = []
2772
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2773

    
2774
  def DeclareLocks(self, level):
2775
    if level == locking.LEVEL_NODE:
2776
      self._LockInstancesNodes()
2777

    
2778
  def CheckPrereq(self):
2779
    """Check prerequisites.
2780

2781
    This checks that the instance is in the cluster.
2782

2783
    """
2784
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2785
    assert self.instance is not None, \
2786
      "Cannot retrieve locked instance %s" % self.op.instance_name
2787
    _CheckNodeOnline(self, self.instance.primary_node)
2788

    
2789
  def Exec(self, feedback_fn):
2790
    """Activate the disks.
2791

2792
    """
2793
    disks_ok, disks_info = _AssembleInstanceDisks(self, self.instance)
2794
    if not disks_ok:
2795
      raise errors.OpExecError("Cannot activate block devices")
2796

    
2797
    return disks_info
2798

    
2799

    
2800
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
2801
  """Prepare the block devices for an instance.
2802

2803
  This sets up the block devices on all nodes.
2804

2805
  @type lu: L{LogicalUnit}
2806
  @param lu: the logical unit on whose behalf we execute
2807
  @type instance: L{objects.Instance}
2808
  @param instance: the instance for whose disks we assemble
2809
  @type ignore_secondaries: boolean
2810
  @param ignore_secondaries: if true, errors on secondary nodes
2811
      won't result in an error return from the function
2812
  @return: False if the operation failed, otherwise a list of
2813
      (host, instance_visible_name, node_visible_name)
2814
      with the mapping from node devices to instance devices
2815

2816
  """
2817
  device_info = []
2818
  disks_ok = True
2819
  iname = instance.name
2820
  # With the two passes mechanism we try to reduce the window of
2821
  # opportunity for the race condition of switching DRBD to primary
2822
  # before handshaking occured, but we do not eliminate it
2823

    
2824
  # The proper fix would be to wait (with some limits) until the
2825
  # connection has been made and drbd transitions from WFConnection
2826
  # into any other network-connected state (Connected, SyncTarget,
2827
  # SyncSource, etc.)
2828

    
2829
  # 1st pass, assemble on all nodes in secondary mode
2830
  for inst_disk in instance.disks:
2831
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2832
      lu.cfg.SetDiskID(node_disk, node)
2833
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
2834
      msg = result.fail_msg
2835
      if msg:
2836
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
2837
                           " (is_primary=False, pass=1): %s",
2838
                           inst_disk.iv_name, node, msg)
2839
        if not ignore_secondaries:
2840
          disks_ok = False
2841

    
2842
  # FIXME: race condition on drbd migration to primary
2843

    
2844
  # 2nd pass, do only the primary node
2845
  for inst_disk in instance.disks:
2846
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2847
      if node != instance.primary_node:
2848
        continue
2849
      lu.cfg.SetDiskID(node_disk, node)
2850
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
2851
      msg = result.fail_msg
2852
      if msg:
2853
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
2854
                           " (is_primary=True, pass=2): %s",
2855
                           inst_disk.iv_name, node, msg)
2856
        disks_ok = False
2857
    device_info.append((instance.primary_node, inst_disk.iv_name,
2858
                        result.payload))
2859

    
2860
  # leave the disks configured for the primary node
2861
  # this is a workaround that would be fixed better by
2862
  # improving the logical/physical id handling
2863
  for disk in instance.disks:
2864
    lu.cfg.SetDiskID(disk, instance.primary_node)
2865

    
2866
  return disks_ok, device_info
2867

    
2868

    
2869
def _StartInstanceDisks(lu, instance, force):
2870
  """Start the disks of an instance.
2871

2872
  """
2873
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
2874
                                           ignore_secondaries=force)
2875
  if not disks_ok:
2876
    _ShutdownInstanceDisks(lu, instance)
2877
    if force is not None and not force:
2878
      lu.proc.LogWarning("", hint="If the message above refers to a"
2879
                         " secondary node,"
2880
                         " you can retry the operation using '--force'.")
2881
    raise errors.OpExecError("Disk consistency error")
2882

    
2883

    
2884
class LUDeactivateInstanceDisks(NoHooksLU):
2885
  """Shutdown an instance's disks.
2886

2887
  """
2888
  _OP_REQP = ["instance_name"]
2889
  REQ_BGL = False
2890

    
2891
  def ExpandNames(self):
2892
    self._ExpandAndLockInstance()
2893
    self.needed_locks[locking.LEVEL_NODE] = []
2894
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2895

    
2896
  def DeclareLocks(self, level):
2897
    if level == locking.LEVEL_NODE:
2898
      self._LockInstancesNodes()
2899

    
2900
  def CheckPrereq(self):
2901
    """Check prerequisites.
2902

2903
    This checks that the instance is in the cluster.
2904

2905
    """
2906
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2907
    assert self.instance is not None, \
2908
      "Cannot retrieve locked instance %s" % self.op.instance_name
2909

    
2910
  def Exec(self, feedback_fn):
2911
    """Deactivate the disks
2912

2913
    """
2914
    instance = self.instance
2915
    _SafeShutdownInstanceDisks(self, instance)
2916

    
2917

    
2918
def _SafeShutdownInstanceDisks(lu, instance):
2919
  """Shutdown block devices of an instance.
2920

2921
  This function checks if an instance is running, before calling
2922
  _ShutdownInstanceDisks.
2923

2924
  """
2925
  pnode = instance.primary_node
2926
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
2927
  ins_l.Raise("Can't contact node %s" % pnode)
2928

    
2929
  if instance.name in ins_l.payload:
2930
    raise errors.OpExecError("Instance is running, can't shutdown"
2931
                             " block devices.")
2932

    
2933
  _ShutdownInstanceDisks(lu, instance)
2934

    
2935

    
2936
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
2937
  """Shutdown block devices of an instance.
2938

2939
  This does the shutdown on all nodes of the instance.
2940

2941
  If the ignore_primary is false, errors on the primary node are
2942
  ignored.
2943

2944
  """
2945
  all_result = True
2946
  for disk in instance.disks:
2947
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
2948
      lu.cfg.SetDiskID(top_disk, node)
2949
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
2950
      msg = result.fail_msg
2951
      if msg:
2952
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
2953
                      disk.iv_name, node, msg)
2954
        if not ignore_primary or node != instance.primary_node:
2955
          all_result = False
2956
  return all_result
2957

    
2958

    
2959
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
2960
  """Checks if a node has enough free memory.
2961

2962
  This function check if a given node has the needed amount of free
2963
  memory. In case the node has less memory or we cannot get the
2964
  information from the node, this function raise an OpPrereqError
2965
  exception.
2966

2967
  @type lu: C{LogicalUnit}
2968
  @param lu: a logical unit from which we get configuration data
2969
  @type node: C{str}
2970
  @param node: the node to check
2971
  @type reason: C{str}
2972
  @param reason: string to use in the error message
2973
  @type requested: C{int}
2974
  @param requested: the amount of memory in MiB to check for
2975
  @type hypervisor_name: C{str}
2976
  @param hypervisor_name: the hypervisor to ask for memory stats
2977
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
2978
      we cannot check the node
2979

2980
  """
2981
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
2982
  nodeinfo[node].Raise("Can't get data from node %s" % node, prereq=True)
2983
  free_mem = nodeinfo[node].payload.get('memory_free', None)
2984
  if not isinstance(free_mem, int):
2985
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
2986
                               " was '%s'" % (node, free_mem))
2987
  if requested > free_mem:
2988
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
2989
                               " needed %s MiB, available %s MiB" %
2990
                               (node, reason, requested, free_mem))
2991

    
2992

    
2993
class LUStartupInstance(LogicalUnit):
2994
  """Starts an instance.
2995

2996
  """
2997
  HPATH = "instance-start"
2998
  HTYPE = constants.HTYPE_INSTANCE
2999
  _OP_REQP = ["instance_name", "force"]
3000
  REQ_BGL = False
3001

    
3002
  def ExpandNames(self):
3003
    self._ExpandAndLockInstance()
3004

    
3005
  def BuildHooksEnv(self):
3006
    """Build hooks env.
3007

3008
    This runs on master, primary and secondary nodes of the instance.
3009

3010
    """
3011
    env = {
3012
      "FORCE": self.op.force,
3013
      }
3014
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3015
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3016
    return env, nl, nl
3017

    
3018
  def CheckPrereq(self):
3019
    """Check prerequisites.
3020

3021
    This checks that the instance is in the cluster.
3022

3023
    """
3024
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3025
    assert self.instance is not None, \
3026
      "Cannot retrieve locked instance %s" % self.op.instance_name
3027

    
3028
    # extra beparams
3029
    self.beparams = getattr(self.op, "beparams", {})
3030
    if self.beparams:
3031
      if not isinstance(self.beparams, dict):
3032
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3033
                                   " dict" % (type(self.beparams), ))
3034
      # fill the beparams dict
3035
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3036
      self.op.beparams = self.beparams
3037

    
3038
    # extra hvparams
3039
    self.hvparams = getattr(self.op, "hvparams", {})
3040
    if self.hvparams:
3041
      if not isinstance(self.hvparams, dict):
3042
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3043
                                   " dict" % (type(self.hvparams), ))
3044

    
3045
      # check hypervisor parameter syntax (locally)
3046
      cluster = self.cfg.GetClusterInfo()
3047
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3048
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3049
                                    instance.hvparams)
3050
      filled_hvp.update(self.hvparams)
3051
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3052
      hv_type.CheckParameterSyntax(filled_hvp)
3053
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3054
      self.op.hvparams = self.hvparams
3055

    
3056
    _CheckNodeOnline(self, instance.primary_node)
3057

    
3058
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3059
    # check bridges existence
3060
    _CheckInstanceBridgesExist(self, instance)
3061

    
3062
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3063
                                              instance.name,
3064
                                              instance.hypervisor)
3065
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3066
                      prereq=True)
3067
    if not remote_info.payload: # not running already
3068
      _CheckNodeFreeMemory(self, instance.primary_node,
3069
                           "starting instance %s" % instance.name,
3070
                           bep[constants.BE_MEMORY], instance.hypervisor)
3071

    
3072
  def Exec(self, feedback_fn):
3073
    """Start the instance.
3074

3075
    """
3076
    instance = self.instance
3077
    force = self.op.force
3078

    
3079
    self.cfg.MarkInstanceUp(instance.name)
3080

    
3081
    node_current = instance.primary_node
3082

    
3083
    _StartInstanceDisks(self, instance, force)
3084

    
3085
    result = self.rpc.call_instance_start(node_current, instance,
3086
                                          self.hvparams, self.beparams)
3087
    msg = result.fail_msg
3088
    if msg:
3089
      _ShutdownInstanceDisks(self, instance)
3090
      raise errors.OpExecError("Could not start instance: %s" % msg)
3091

    
3092

    
3093
class LURebootInstance(LogicalUnit):
3094
  """Reboot an instance.
3095

3096
  """
3097
  HPATH = "instance-reboot"
3098
  HTYPE = constants.HTYPE_INSTANCE
3099
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3100
  REQ_BGL = False
3101

    
3102
  def ExpandNames(self):
3103
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3104
                                   constants.INSTANCE_REBOOT_HARD,
3105
                                   constants.INSTANCE_REBOOT_FULL]:
3106
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3107
                                  (constants.INSTANCE_REBOOT_SOFT,
3108
                                   constants.INSTANCE_REBOOT_HARD,
3109
                                   constants.INSTANCE_REBOOT_FULL))
3110
    self._ExpandAndLockInstance()
3111

    
3112
  def BuildHooksEnv(self):
3113
    """Build hooks env.
3114

3115
    This runs on master, primary and secondary nodes of the instance.
3116

3117
    """
3118
    env = {
3119
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3120
      "REBOOT_TYPE": self.op.reboot_type,
3121
      }
3122
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3123
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3124
    return env, nl, nl
3125

    
3126
  def CheckPrereq(self):
3127
    """Check prerequisites.
3128

3129
    This checks that the instance is in the cluster.
3130

3131
    """
3132
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3133
    assert self.instance is not None, \
3134
      "Cannot retrieve locked instance %s" % self.op.instance_name
3135

    
3136
    _CheckNodeOnline(self, instance.primary_node)
3137

    
3138
    # check bridges existence
3139
    _CheckInstanceBridgesExist(self, instance)
3140

    
3141
  def Exec(self, feedback_fn):
3142
    """Reboot the instance.
3143

3144
    """
3145
    instance = self.instance
3146
    ignore_secondaries = self.op.ignore_secondaries
3147
    reboot_type = self.op.reboot_type
3148

    
3149
    node_current = instance.primary_node
3150

    
3151
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3152
                       constants.INSTANCE_REBOOT_HARD]:
3153
      for disk in instance.disks:
3154
        self.cfg.SetDiskID(disk, node_current)
3155
      result = self.rpc.call_instance_reboot(node_current, instance,
3156
                                             reboot_type)
3157
      result.Raise("Could not reboot instance")
3158
    else:
3159
      result = self.rpc.call_instance_shutdown(node_current, instance)
3160
      result.Raise("Could not shutdown instance for full reboot")
3161
      _ShutdownInstanceDisks(self, instance)
3162
      _StartInstanceDisks(self, instance, ignore_secondaries)
3163
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3164
      msg = result.fail_msg
3165
      if msg:
3166
        _ShutdownInstanceDisks(self, instance)
3167
        raise errors.OpExecError("Could not start instance for"
3168
                                 " full reboot: %s" % msg)
3169

    
3170
    self.cfg.MarkInstanceUp(instance.name)
3171

    
3172

    
3173
class LUShutdownInstance(LogicalUnit):
3174
  """Shutdown an instance.
3175

3176
  """
3177
  HPATH = "instance-stop"
3178
  HTYPE = constants.HTYPE_INSTANCE
3179
  _OP_REQP = ["instance_name"]
3180
  REQ_BGL = False
3181

    
3182
  def ExpandNames(self):
3183
    self._ExpandAndLockInstance()
3184

    
3185
  def BuildHooksEnv(self):
3186
    """Build hooks env.
3187

3188
    This runs on master, primary and secondary nodes of the instance.
3189

3190
    """
3191
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3192
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3193
    return env, nl, nl
3194

    
3195
  def CheckPrereq(self):
3196
    """Check prerequisites.
3197

3198
    This checks that the instance is in the cluster.
3199

3200
    """
3201
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3202
    assert self.instance is not None, \
3203
      "Cannot retrieve locked instance %s" % self.op.instance_name
3204
    _CheckNodeOnline(self, self.instance.primary_node)
3205

    
3206
  def Exec(self, feedback_fn):
3207
    """Shutdown the instance.
3208

3209
    """
3210
    instance = self.instance
3211
    node_current = instance.primary_node
3212
    self.cfg.MarkInstanceDown(instance.name)
3213
    result = self.rpc.call_instance_shutdown(node_current, instance)
3214
    msg = result.fail_msg
3215
    if msg:
3216
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3217

    
3218
    _ShutdownInstanceDisks(self, instance)
3219

    
3220

    
3221
class LUReinstallInstance(LogicalUnit):
3222
  """Reinstall an instance.
3223

3224
  """
3225
  HPATH = "instance-reinstall"
3226
  HTYPE = constants.HTYPE_INSTANCE
3227
  _OP_REQP = ["instance_name"]
3228
  REQ_BGL = False
3229

    
3230
  def ExpandNames(self):
3231
    self._ExpandAndLockInstance()
3232

    
3233
  def BuildHooksEnv(self):
3234
    """Build hooks env.
3235

3236
    This runs on master, primary and secondary nodes of the instance.
3237

3238
    """
3239
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3240
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3241
    return env, nl, nl
3242

    
3243
  def CheckPrereq(self):
3244
    """Check prerequisites.
3245

3246
    This checks that the instance is in the cluster and is not running.
3247

3248
    """
3249
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3250
    assert instance is not None, \
3251
      "Cannot retrieve locked instance %s" % self.op.instance_name
3252
    _CheckNodeOnline(self, instance.primary_node)
3253

    
3254
    if instance.disk_template == constants.DT_DISKLESS:
3255
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3256
                                 self.op.instance_name)
3257
    if instance.admin_up:
3258
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3259
                                 self.op.instance_name)
3260
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3261
                                              instance.name,
3262
                                              instance.hypervisor)
3263
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3264
                      prereq=True)
3265
    if remote_info.payload:
3266
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3267
                                 (self.op.instance_name,
3268
                                  instance.primary_node))
3269

    
3270
    self.op.os_type = getattr(self.op, "os_type", None)
3271
    if self.op.os_type is not None:
3272
      # OS verification
3273
      pnode = self.cfg.GetNodeInfo(
3274
        self.cfg.ExpandNodeName(instance.primary_node))
3275
      if pnode is None:
3276
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
3277
                                   self.op.pnode)
3278
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3279
      result.Raise("OS '%s' not in supported OS list for primary node %s" %
3280
                   (self.op.os_type, pnode.name), prereq=True)
3281

    
3282
    self.instance = instance
3283

    
3284
  def Exec(self, feedback_fn):
3285
    """Reinstall the instance.
3286

3287
    """
3288
    inst = self.instance
3289

    
3290
    if self.op.os_type is not None:
3291
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3292
      inst.os = self.op.os_type
3293
      self.cfg.Update(inst)
3294

    
3295
    _StartInstanceDisks(self, inst, None)
3296
    try:
3297
      feedback_fn("Running the instance OS create scripts...")
3298
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
3299
      result.Raise("Could not install OS for instance %s on node %s" %
3300
                   (inst.name, inst.primary_node))
3301
    finally:
3302
      _ShutdownInstanceDisks(self, inst)
3303

    
3304

    
3305
class LURenameInstance(LogicalUnit):
3306
  """Rename an instance.
3307

3308
  """
3309
  HPATH = "instance-rename"
3310
  HTYPE = constants.HTYPE_INSTANCE
3311
  _OP_REQP = ["instance_name", "new_name"]
3312

    
3313
  def BuildHooksEnv(self):
3314
    """Build hooks env.
3315

3316
    This runs on master, primary and secondary nodes of the instance.
3317

3318
    """
3319
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3320
    env["INSTANCE_NEW_NAME"] = self.op.new_name
3321
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3322
    return env, nl, nl
3323

    
3324
  def CheckPrereq(self):
3325
    """Check prerequisites.
3326

3327
    This checks that the instance is in the cluster and is not running.
3328

3329
    """
3330
    instance = self.cfg.GetInstanceInfo(
3331
      self.cfg.ExpandInstanceName(self.op.instance_name))
3332
    if instance is None:
3333
      raise errors.OpPrereqError("Instance '%s' not known" %
3334
                                 self.op.instance_name)
3335
    _CheckNodeOnline(self, instance.primary_node)
3336

    
3337
    if instance.admin_up:
3338
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3339
                                 self.op.instance_name)
3340
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3341
                                              instance.name,
3342
                                              instance.hypervisor)
3343
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3344
                      prereq=True)
3345
    if remote_info.payload:
3346
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3347
                                 (self.op.instance_name,
3348
                                  instance.primary_node))
3349
    self.instance = instance
3350

    
3351
    # new name verification
3352
    name_info = utils.HostInfo(self.op.new_name)
3353

    
3354
    self.op.new_name = new_name = name_info.name
3355
    instance_list = self.cfg.GetInstanceList()
3356
    if new_name in instance_list:
3357
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3358
                                 new_name)
3359

    
3360
    if not getattr(self.op, "ignore_ip", False):
3361
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
3362
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
3363
                                   (name_info.ip, new_name))
3364

    
3365

    
3366
  def Exec(self, feedback_fn):
3367
    """Reinstall the instance.
3368

3369
    """
3370
    inst = self.instance
3371
    old_name = inst.name
3372

    
3373
    if inst.disk_template == constants.DT_FILE:
3374
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3375

    
3376
    self.cfg.RenameInstance(inst.name, self.op.new_name)
3377
    # Change the instance lock. This is definitely safe while we hold the BGL
3378
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
3379
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
3380

    
3381
    # re-read the instance from the configuration after rename
3382
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
3383

    
3384
    if inst.disk_template == constants.DT_FILE:
3385
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3386
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
3387
                                                     old_file_storage_dir,
3388
                                                     new_file_storage_dir)
3389
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
3390
                   " (but the instance has been renamed in Ganeti)" %
3391
                   (inst.primary_node, old_file_storage_dir,
3392
                    new_file_storage_dir))
3393

    
3394
    _StartInstanceDisks(self, inst, None)
3395
    try:
3396
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
3397
                                                 old_name)
3398
      msg = result.fail_msg
3399
      if msg:
3400
        msg = ("Could not run OS rename script for instance %s on node %s"
3401
               " (but the instance has been renamed in Ganeti): %s" %
3402
               (inst.name, inst.primary_node, msg))
3403
        self.proc.LogWarning(msg)
3404
    finally:
3405
      _ShutdownInstanceDisks(self, inst)
3406

    
3407

    
3408
class LURemoveInstance(LogicalUnit):
3409
  """Remove an instance.
3410

3411
  """
3412
  HPATH = "instance-remove"
3413
  HTYPE = constants.HTYPE_INSTANCE
3414
  _OP_REQP = ["instance_name", "ignore_failures"]
3415
  REQ_BGL = False
3416

    
3417
  def ExpandNames(self):
3418
    self._ExpandAndLockInstance()
3419
    self.needed_locks[locking.LEVEL_NODE] = []
3420
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3421

    
3422
  def DeclareLocks(self, level):
3423
    if level == locking.LEVEL_NODE:
3424
      self._LockInstancesNodes()
3425

    
3426
  def BuildHooksEnv(self):
3427
    """Build hooks env.
3428

3429
    This runs on master, primary and secondary nodes of the instance.
3430

3431
    """
3432
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3433
    nl = [self.cfg.GetMasterNode()]
3434
    return env, nl, nl
3435

    
3436
  def CheckPrereq(self):
3437
    """Check prerequisites.
3438

3439
    This checks that the instance is in the cluster.
3440

3441
    """
3442
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3443
    assert self.instance is not None, \
3444
      "Cannot retrieve locked instance %s" % self.op.instance_name
3445

    
3446
  def Exec(self, feedback_fn):
3447
    """Remove the instance.
3448

3449
    """
3450
    instance = self.instance
3451
    logging.info("Shutting down instance %s on node %s",
3452
                 instance.name, instance.primary_node)
3453

    
3454
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
3455
    msg = result.fail_msg
3456
    if msg:
3457
      if self.op.ignore_failures:
3458
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
3459
      else:
3460
        raise errors.OpExecError("Could not shutdown instance %s on"
3461
                                 " node %s: %s" %
3462
                                 (instance.name, instance.primary_node, msg))
3463

    
3464
    logging.info("Removing block devices for instance %s", instance.name)
3465

    
3466
    if not _RemoveDisks(self, instance):
3467
      if self.op.ignore_failures:
3468
        feedback_fn("Warning: can't remove instance's disks")
3469
      else:
3470
        raise errors.OpExecError("Can't remove instance's disks")
3471

    
3472
    logging.info("Removing instance %s out of cluster config", instance.name)
3473

    
3474
    self.cfg.RemoveInstance(instance.name)
3475
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
3476

    
3477

    
3478
class LUQueryInstances(NoHooksLU):
3479
  """Logical unit for querying instances.
3480

3481
  """
3482
  _OP_REQP = ["output_fields", "names", "use_locking"]
3483
  REQ_BGL = False
3484
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
3485
                                    "admin_state",
3486
                                    "disk_template", "ip", "mac", "bridge",
3487
                                    "nic_mode", "nic_link",
3488
                                    "sda_size", "sdb_size", "vcpus", "tags",
3489
                                    "network_port", "beparams",
3490
                                    r"(disk)\.(size)/([0-9]+)",
3491
                                    r"(disk)\.(sizes)", "disk_usage",
3492
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
3493
                                    r"(nic)\.(bridge)/([0-9]+)",
3494
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
3495
                                    r"(disk|nic)\.(count)",
3496
                                    "serial_no", "hypervisor", "hvparams",] +
3497
                                  ["hv/%s" % name
3498
                                   for name in constants.HVS_PARAMETERS] +
3499
                                  ["be/%s" % name
3500
                                   for name in constants.BES_PARAMETERS])
3501
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
3502

    
3503

    
3504
  def ExpandNames(self):
3505
    _CheckOutputFields(static=self._FIELDS_STATIC,
3506
                       dynamic=self._FIELDS_DYNAMIC,
3507
                       selected=self.op.output_fields)
3508

    
3509
    self.needed_locks = {}
3510
    self.share_locks[locking.LEVEL_INSTANCE] = 1
3511
    self.share_locks[locking.LEVEL_NODE] = 1
3512

    
3513
    if self.op.names:
3514
      self.wanted = _GetWantedInstances(self, self.op.names)
3515
    else:
3516
      self.wanted = locking.ALL_SET
3517

    
3518
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3519
    self.do_locking = self.do_node_query and self.op.use_locking
3520
    if self.do_locking:
3521
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3522
      self.needed_locks[locking.LEVEL_NODE] = []
3523
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3524

    
3525
  def DeclareLocks(self, level):
3526
    if level == locking.LEVEL_NODE and self.do_locking:
3527
      self._LockInstancesNodes()
3528

    
3529
  def CheckPrereq(self):
3530
    """Check prerequisites.
3531

3532
    """
3533
    pass
3534

    
3535
  def Exec(self, feedback_fn):
3536
    """Computes the list of nodes and their attributes.
3537

3538
    """
3539
    all_info = self.cfg.GetAllInstancesInfo()
3540
    if self.wanted == locking.ALL_SET:
3541
      # caller didn't specify instance names, so ordering is not important
3542
      if self.do_locking:
3543
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
3544
      else:
3545
        instance_names = all_info.keys()
3546
      instance_names = utils.NiceSort(instance_names)
3547
    else:
3548
      # caller did specify names, so we must keep the ordering
3549
      if self.do_locking:
3550
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
3551
      else:
3552
        tgt_set = all_info.keys()
3553
      missing = set(self.wanted).difference(tgt_set)
3554
      if missing:
3555
        raise errors.OpExecError("Some instances were removed before"
3556
                                 " retrieving their data: %s" % missing)
3557
      instance_names = self.wanted
3558

    
3559
    instance_list = [all_info[iname] for iname in instance_names]
3560

    
3561
    # begin data gathering
3562

    
3563
    nodes = frozenset([inst.primary_node for inst in instance_list])
3564
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
3565

    
3566
    bad_nodes = []
3567
    off_nodes = []
3568
    if self.do_node_query:
3569
      live_data = {}
3570
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
3571
      for name in nodes:
3572
        result = node_data[name]
3573
        if result.offline:
3574
          # offline nodes will be in both lists
3575
          off_nodes.append(name)
3576
        if result.failed or result.fail_msg:
3577
          bad_nodes.append(name)
3578
        else:
3579
          if result.payload:
3580
            live_data.update(result.payload)
3581
          # else no instance is alive
3582
    else:
3583
      live_data = dict([(name, {}) for name in instance_names])
3584

    
3585
    # end data gathering
3586

    
3587
    HVPREFIX = "hv/"
3588
    BEPREFIX = "be/"
3589
    output = []
3590
    cluster = self.cfg.GetClusterInfo()
3591
    for instance in instance_list:
3592
      iout = []
3593
      i_hv = cluster.FillHV(instance)
3594
      i_be = cluster.FillBE(instance)
3595
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
3596
                                 nic.nicparams) for nic in instance.nics]
3597
      for field in self.op.output_fields:
3598
        st_match = self._FIELDS_STATIC.Matches(field)
3599
        if field == "name":
3600
          val = instance.name
3601
        elif field == "os":
3602
          val = instance.os
3603
        elif field == "pnode":
3604
          val = instance.primary_node
3605
        elif field == "snodes":
3606
          val = list(instance.secondary_nodes)
3607
        elif field == "admin_state":
3608
          val = instance.admin_up
3609
        elif field == "oper_state":
3610
          if instance.primary_node in bad_nodes:
3611
            val = None
3612
          else:
3613
            val = bool(live_data.get(instance.name))
3614
        elif field == "status":
3615
          if instance.primary_node in off_nodes:
3616
            val = "ERROR_nodeoffline"
3617
          elif instance.primary_node in bad_nodes:
3618
            val = "ERROR_nodedown"
3619
          else:
3620
            running = bool(live_data.get(instance.name))
3621
            if running:
3622
              if instance.admin_up:
3623
                val = "running"
3624
              else:
3625
                val = "ERROR_up"
3626
            else:
3627
              if instance.admin_up:
3628
                val = "ERROR_down"
3629
              else:
3630
                val = "ADMIN_down"
3631
        elif field == "oper_ram":
3632
          if instance.primary_node in bad_nodes:
3633
            val = None
3634
          elif instance.name in live_data:
3635
            val = live_data[instance.name].get("memory", "?")
3636
          else:
3637
            val = "-"
3638
        elif field == "vcpus":
3639
          val = i_be[constants.BE_VCPUS]
3640
        elif field == "disk_template":
3641
          val = instance.disk_template
3642
        elif field == "ip":
3643
          if instance.nics:
3644
            val = instance.nics[0].ip
3645
          else:
3646
            val = None
3647
        elif field == "nic_mode":
3648
          if instance.nics:
3649
            val = i_nicp[0][constants.NIC_MODE]
3650
          else:
3651
            val = None
3652
        elif field == "nic_link":
3653
          if instance.nics:
3654
            val = i_nicp[0][constants.NIC_LINK]
3655
          else:
3656
            val = None
3657
        elif field == "bridge":
3658
          if (instance.nics and
3659
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
3660
            val = i_nicp[0][constants.NIC_LINK]
3661
          else:
3662
            val = None
3663
        elif field == "mac":
3664
          if instance.nics:
3665
            val = instance.nics[0].mac
3666
          else:
3667
            val = None
3668
        elif field == "sda_size" or field == "sdb_size":
3669
          idx = ord(field[2]) - ord('a')
3670
          try:
3671
            val = instance.FindDisk(idx).size
3672
          except errors.OpPrereqError:
3673
            val = None
3674
        elif field == "disk_usage": # total disk usage per node
3675
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
3676
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
3677
        elif field == "tags":
3678
          val = list(instance.GetTags())
3679
        elif field == "serial_no":
3680
          val = instance.serial_no
3681
        elif field == "network_port":
3682
          val = instance.network_port
3683
        elif field == "hypervisor":
3684
          val = instance.hypervisor
3685
        elif field == "hvparams":
3686
          val = i_hv
3687
        elif (field.startswith(HVPREFIX) and
3688
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
3689
          val = i_hv.get(field[len(HVPREFIX):], None)
3690
        elif field == "beparams":
3691
          val = i_be
3692
        elif (field.startswith(BEPREFIX) and
3693
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
3694
          val = i_be.get(field[len(BEPREFIX):], None)
3695
        elif st_match and st_match.groups():
3696
          # matches a variable list
3697
          st_groups = st_match.groups()
3698
          if st_groups and st_groups[0] == "disk":
3699
            if st_groups[1] == "count":
3700
              val = len(instance.disks)
3701
            elif st_groups[1] == "sizes":
3702
              val = [disk.size for disk in instance.disks]
3703
            elif st_groups[1] == "size":
3704
              try:
3705
                val = instance.FindDisk(st_groups[2]).size
3706
              except errors.OpPrereqError:
3707
                val = None
3708
            else:
3709
              assert False, "Unhandled disk parameter"
3710
          elif st_groups[0] == "nic":
3711
            if st_groups[1] == "count":
3712
              val = len(instance.nics)
3713
            elif st_groups[1] == "macs":
3714
              val = [nic.mac for nic in instance.nics]
3715
            elif st_groups[1] == "ips":
3716
              val = [nic.ip for nic in instance.nics]
3717
            elif st_groups[1] == "modes":
3718
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
3719
            elif st_groups[1] == "links":
3720
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
3721
            elif st_groups[1] == "bridges":
3722
              val = []
3723
              for nicp in i_nicp:
3724
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3725
                  val.append(nicp[constants.NIC_LINK])
3726
                else:
3727
                  val.append(None)
3728
            else:
3729
              # index-based item
3730
              nic_idx = int(st_groups[2])
3731
              if nic_idx >= len(instance.nics):
3732
                val = None
3733
              else:
3734
                if st_groups[1] == "mac":
3735
                  val = instance.nics[nic_idx].mac
3736
                elif st_groups[1] == "ip":
3737
                  val = instance.nics[nic_idx].ip
3738
                elif st_groups[1] == "mode":
3739
                  val = i_nicp[nic_idx][constants.NIC_MODE]
3740
                elif st_groups[1] == "link":
3741
                  val = i_nicp[nic_idx][constants.NIC_LINK]
3742
                elif st_groups[1] == "bridge":
3743
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
3744
                  if nic_mode == constants.NIC_MODE_BRIDGED:
3745
                    val = i_nicp[nic_idx][constants.NIC_LINK]
3746
                  else:
3747
                    val = None
3748
                else:
3749
                  assert False, "Unhandled NIC parameter"
3750
          else:
3751
            assert False, ("Declared but unhandled variable parameter '%s'" %
3752
                           field)
3753
        else:
3754
          assert False, "Declared but unhandled parameter '%s'" % field
3755
        iout.append(val)
3756
      output.append(iout)
3757

    
3758
    return output
3759

    
3760

    
3761
class LUFailoverInstance(LogicalUnit):
3762
  """Failover an instance.
3763

3764
  """
3765
  HPATH = "instance-failover"
3766
  HTYPE = constants.HTYPE_INSTANCE
3767
  _OP_REQP = ["instance_name", "ignore_consistency"]
3768
  REQ_BGL = False
3769

    
3770
  def ExpandNames(self):
3771
    self._ExpandAndLockInstance()
3772
    self.needed_locks[locking.LEVEL_NODE] = []
3773
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3774

    
3775
  def DeclareLocks(self, level):
3776
    if level == locking.LEVEL_NODE:
3777
      self._LockInstancesNodes()
3778

    
3779
  def BuildHooksEnv(self):
3780
    """Build hooks env.
3781

3782
    This runs on master, primary and secondary nodes of the instance.
3783

3784
    """
3785
    env = {
3786
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
3787
      }
3788
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3789
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
3790
    return env, nl, nl
3791

    
3792
  def CheckPrereq(self):
3793
    """Check prerequisites.
3794

3795
    This checks that the instance is in the cluster.
3796

3797
    """
3798
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3799
    assert self.instance is not None, \
3800
      "Cannot retrieve locked instance %s" % self.op.instance_name
3801

    
3802
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3803
    if instance.disk_template not in constants.DTS_NET_MIRROR:
3804
      raise errors.OpPrereqError("Instance's disk layout is not"
3805
                                 " network mirrored, cannot failover.")
3806

    
3807
    secondary_nodes = instance.secondary_nodes
3808
    if not secondary_nodes:
3809
      raise errors.ProgrammerError("no secondary node but using "
3810
                                   "a mirrored disk template")
3811

    
3812
    target_node = secondary_nodes[0]
3813
    _CheckNodeOnline(self, target_node)
3814
    _CheckNodeNotDrained(self, target_node)
3815
    if instance.admin_up:
3816
      # check memory requirements on the secondary node
3817
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
3818
                           instance.name, bep[constants.BE_MEMORY],
3819
                           instance.hypervisor)
3820
    else:
3821
      self.LogInfo("Not checking memory on the secondary node as"
3822
                   " instance will not be started")
3823

    
3824
    # check bridge existance
3825
    _CheckInstanceBridgesExist(self, instance, node=target_node)
3826

    
3827
  def Exec(self, feedback_fn):
3828
    """Failover an instance.
3829

3830
    The failover is done by shutting it down on its present node and
3831
    starting it on the secondary.
3832

3833
    """
3834
    instance = self.instance
3835

    
3836
    source_node = instance.primary_node
3837
    target_node = instance.secondary_nodes[0]
3838

    
3839
    feedback_fn("* checking disk consistency between source and target")
3840
    for dev in instance.disks:
3841
      # for drbd, these are drbd over lvm
3842
      if not _CheckDiskConsistency(self, dev, target_node, False):
3843
        if instance.admin_up and not self.op.ignore_consistency:
3844
          raise errors.OpExecError("Disk %s is degraded on target node,"
3845
                                   " aborting failover." % dev.iv_name)
3846

    
3847
    feedback_fn("* shutting down instance on source node")
3848
    logging.info("Shutting down instance %s on node %s",
3849
                 instance.name, source_node)
3850

    
3851
    result = self.rpc.call_instance_shutdown(source_node, instance)
3852
    msg = result.fail_msg
3853
    if msg:
3854
      if self.op.ignore_consistency:
3855
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
3856
                             " Proceeding anyway. Please make sure node"
3857
                             " %s is down. Error details: %s",
3858
                             instance.name, source_node, source_node, msg)
3859
      else:
3860
        raise errors.OpExecError("Could not shutdown instance %s on"
3861
                                 " node %s: %s" %
3862
                                 (instance.name, source_node, msg))
3863

    
3864
    feedback_fn("* deactivating the instance's disks on source node")
3865
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
3866
      raise errors.OpExecError("Can't shut down the instance's disks.")
3867

    
3868
    instance.primary_node = target_node
3869
    # distribute new instance config to the other nodes
3870
    self.cfg.Update(instance)
3871

    
3872
    # Only start the instance if it's marked as up
3873
    if instance.admin_up:
3874
      feedback_fn("* activating the instance's disks on target node")
3875
      logging.info("Starting instance %s on node %s",
3876
                   instance.name, target_node)
3877

    
3878
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
3879
                                               ignore_secondaries=True)
3880
      if not disks_ok:
3881
        _ShutdownInstanceDisks(self, instance)
3882
        raise errors.OpExecError("Can't activate the instance's disks")
3883

    
3884
      feedback_fn("* starting the instance on the target node")
3885
      result = self.rpc.call_instance_start(target_node, instance, None, None)
3886
      msg = result.fail_msg
3887
      if msg:
3888
        _ShutdownInstanceDisks(self, instance)
3889
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
3890
                                 (instance.name, target_node, msg))
3891

    
3892

    
3893
class LUMigrateInstance(LogicalUnit):
3894
  """Migrate an instance.
3895

3896
  This is migration without shutting down, compared to the failover,
3897
  which is done with shutdown.
3898

3899
  """
3900
  HPATH = "instance-migrate"
3901
  HTYPE = constants.HTYPE_INSTANCE
3902
  _OP_REQP = ["instance_name", "live", "cleanup"]
3903

    
3904
  REQ_BGL = False
3905

    
3906
  def ExpandNames(self):
3907
    self._ExpandAndLockInstance()
3908
    self.needed_locks[locking.LEVEL_NODE] = []
3909
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3910

    
3911
  def DeclareLocks(self, level):
3912
    if level == locking.LEVEL_NODE:
3913
      self._LockInstancesNodes()
3914

    
3915
  def BuildHooksEnv(self):
3916
    """Build hooks env.
3917

3918
    This runs on master, primary and secondary nodes of the instance.
3919

3920
    """
3921
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3922
    env["MIGRATE_LIVE"] = self.op.live
3923
    env["MIGRATE_CLEANUP"] = self.op.cleanup
3924
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
3925
    return env, nl, nl
3926

    
3927
  def CheckPrereq(self):
3928
    """Check prerequisites.
3929

3930
    This checks that the instance is in the cluster.
3931

3932
    """
3933
    instance = self.cfg.GetInstanceInfo(
3934
      self.cfg.ExpandInstanceName(self.op.instance_name))
3935
    if instance is None:
3936
      raise errors.OpPrereqError("Instance '%s' not known" %
3937
                                 self.op.instance_name)
3938

    
3939
    if instance.disk_template != constants.DT_DRBD8:
3940
      raise errors.OpPrereqError("Instance's disk layout is not"
3941
                                 " drbd8, cannot migrate.")
3942

    
3943
    secondary_nodes = instance.secondary_nodes
3944
    if not secondary_nodes:
3945
      raise errors.ConfigurationError("No secondary node but using"
3946
                                      " drbd8 disk template")
3947

    
3948
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
3949

    
3950
    target_node = secondary_nodes[0]
3951
    # check memory requirements on the secondary node
3952
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
3953
                         instance.name, i_be[constants.BE_MEMORY],
3954
                         instance.hypervisor)
3955

    
3956
    # check bridge existance
3957
    _CheckInstanceBridgesExist(self, instance, node=target_node)
3958

    
3959
    if not self.op.cleanup:
3960
      _CheckNodeNotDrained(self, target_node)
3961
      result = self.rpc.call_instance_migratable(instance.primary_node,
3962
                                                 instance)
3963
      result.Raise("Can't migrate, please use failover", prereq=True)
3964

    
3965
    self.instance = instance
3966

    
3967
  def _WaitUntilSync(self):
3968
    """Poll with custom rpc for disk sync.
3969

3970
    This uses our own step-based rpc call.
3971

3972
    """
3973
    self.feedback_fn("* wait until resync is done")
3974
    all_done = False
3975
    while not all_done:
3976
      all_done = True
3977
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
3978
                                            self.nodes_ip,
3979
                                            self.instance.disks)
3980
      min_percent = 100
3981
      for node, nres in result.items():
3982
        nres.Raise("Cannot resync disks on node %s" % node)
3983
        node_done, node_percent = nres.payload
3984
        all_done = all_done and node_done
3985
        if node_percent is not None:
3986
          min_percent = min(min_percent, node_percent)
3987
      if not all_done:
3988
        if min_percent < 100:
3989
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
3990
        time.sleep(2)
3991

    
3992
  def _EnsureSecondary(self, node):
3993
    """Demote a node to secondary.
3994

3995
    """
3996
    self.feedback_fn("* switching node %s to secondary mode" % node)
3997

    
3998
    for dev in self.instance.disks:
3999
      self.cfg.SetDiskID(dev, node)
4000

    
4001
    result = self.rpc.call_blockdev_close(node, self.instance.name,
4002
                                          self.instance.disks)
4003
    result.Raise("Cannot change disk to secondary on node %s" % node)
4004

    
4005
  def _GoStandalone(self):
4006
    """Disconnect from the network.
4007

4008
    """
4009
    self.feedback_fn("* changing into standalone mode")
4010
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
4011
                                               self.instance.disks)
4012
    for node, nres in result.items():
4013
      nres.Raise("Cannot disconnect disks node %s" % node)
4014

    
4015
  def _GoReconnect(self, multimaster):
4016
    """Reconnect to the network.
4017

4018
    """
4019
    if multimaster:
4020
      msg = "dual-master"
4021
    else:
4022
      msg = "single-master"
4023
    self.feedback_fn("* changing disks into %s mode" % msg)
4024
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
4025
                                           self.instance.disks,
4026
                                           self.instance.name, multimaster)
4027
    for node, nres in result.items():
4028
      nres.Raise("Cannot change disks config on node %s" % node)
4029

    
4030
  def _ExecCleanup(self):
4031
    """Try to cleanup after a failed migration.
4032

4033
    The cleanup is done by:
4034
      - check that the instance is running only on one node
4035
        (and update the config if needed)
4036
      - change disks on its secondary node to secondary
4037
      - wait until disks are fully synchronized
4038
      - disconnect from the network
4039
      - change disks into single-master mode
4040
      - wait again until disks are fully synchronized
4041

4042
    """
4043
    instance = self.instance
4044
    target_node = self.target_node
4045
    source_node = self.source_node
4046

    
4047
    # check running on only one node
4048
    self.feedback_fn("* checking where the instance actually runs"
4049
                     " (if this hangs, the hypervisor might be in"
4050
                     " a bad state)")
4051
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
4052
    for node, result in ins_l.items():
4053
      result.Raise("Can't contact node %s" % node)
4054

    
4055
    runningon_source = instance.name in ins_l[source_node].payload
4056
    runningon_target = instance.name in ins_l[target_node].payload
4057

    
4058
    if runningon_source and runningon_target:
4059
      raise errors.OpExecError("Instance seems to be running on two nodes,"
4060
                               " or the hypervisor is confused. You will have"
4061
                               " to ensure manually that it runs only on one"
4062
                               " and restart this operation.")
4063

    
4064
    if not (runningon_source or runningon_target):
4065
      raise errors.OpExecError("Instance does not seem to be running at all."
4066
                               " In this case, it's safer to repair by"
4067
                               " running 'gnt-instance stop' to ensure disk"
4068
                               " shutdown, and then restarting it.")
4069

    
4070
    if runningon_target:
4071
      # the migration has actually succeeded, we need to update the config
4072
      self.feedback_fn("* instance running on secondary node (%s),"
4073
                       " updating config" % target_node)
4074
      instance.primary_node = target_node
4075
      self.cfg.Update(instance)
4076
      demoted_node = source_node
4077
    else:
4078
      self.feedback_fn("* instance confirmed to be running on its"
4079
                       " primary node (%s)" % source_node)
4080
      demoted_node = target_node
4081

    
4082
    self._EnsureSecondary(demoted_node)
4083
    try:
4084
      self._WaitUntilSync()
4085
    except errors.OpExecError:
4086
      # we ignore here errors, since if the device is standalone, it
4087
      # won't be able to sync
4088
      pass
4089
    self._GoStandalone()
4090
    self._GoReconnect(False)
4091
    self._WaitUntilSync()
4092

    
4093
    self.feedback_fn("* done")
4094

    
4095
  def _RevertDiskStatus(self):
4096
    """Try to revert the disk status after a failed migration.
4097

4098
    """
4099
    target_node = self.target_node
4100
    try:
4101
      self._EnsureSecondary(target_node)
4102
      self._GoStandalone()
4103
      self._GoReconnect(False)
4104
      self._WaitUntilSync()
4105
    except errors.OpExecError, err:
4106
      self.LogWarning("Migration failed and I can't reconnect the"
4107
                      " drives: error '%s'\n"
4108
                      "Please look and recover the instance status" %
4109
                      str(err))
4110

    
4111
  def _AbortMigration(self):
4112
    """Call the hypervisor code to abort a started migration.
4113

4114
    """
4115
    instance = self.instance
4116
    target_node = self.target_node
4117
    migration_info = self.migration_info
4118

    
4119
    abort_result = self.rpc.call_finalize_migration(target_node,
4120
                                                    instance,
4121
                                                    migration_info,
4122
                                                    False)
4123
    abort_msg = abort_result.fail_msg
4124
    if abort_msg:
4125
      logging.error("Aborting migration failed on target node %s: %s" %
4126
                    (target_node, abort_msg))
4127
      # Don't raise an exception here, as we stil have to try to revert the
4128
      # disk status, even if this step failed.
4129

    
4130
  def _ExecMigration(self):
4131
    """Migrate an instance.
4132

4133
    The migrate is done by:
4134
      - change the disks into dual-master mode
4135
      - wait until disks are fully synchronized again
4136
      - migrate the instance
4137
      - change disks on the new secondary node (the old primary) to secondary
4138
      - wait until disks are fully synchronized
4139
      - change disks into single-master mode
4140

4141
    """
4142
    instance = self.instance
4143
    target_node = self.target_node
4144
    source_node = self.source_node
4145

    
4146
    self.feedback_fn("* checking disk consistency between source and target")
4147
    for dev in instance.disks:
4148
      if not _CheckDiskConsistency(self, dev, target_node, False):
4149
        raise errors.OpExecError("Disk %s is degraded or not fully"
4150
                                 " synchronized on target node,"
4151
                                 " aborting migrate." % dev.iv_name)
4152

    
4153
    # First get the migration information from the remote node
4154
    result = self.rpc.call_migration_info(source_node, instance)
4155
    msg = result.fail_msg
4156
    if msg:
4157
      log_err = ("Failed fetching source migration information from %s: %s" %
4158
                 (source_node, msg))
4159
      logging.error(log_err)
4160
      raise errors.OpExecError(log_err)
4161

    
4162
    self.migration_info = migration_info = result.payload
4163

    
4164
    # Then switch the disks to master/master mode
4165
    self._EnsureSecondary(target_node)
4166
    self._GoStandalone()
4167
    self._GoReconnect(True)
4168
    self._WaitUntilSync()
4169

    
4170
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
4171
    result = self.rpc.call_accept_instance(target_node,
4172
                                           instance,
4173
                                           migration_info,
4174
                                           self.nodes_ip[target_node])
4175

    
4176
    msg = result.fail_msg
4177
    if msg:
4178
      logging.error("Instance pre-migration failed, trying to revert"
4179
                    " disk status: %s", msg)
4180
      self._AbortMigration()
4181
      self._RevertDiskStatus()
4182
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
4183
                               (instance.name, msg))
4184

    
4185
    self.feedback_fn("* migrating instance to %s" % target_node)
4186
    time.sleep(10)
4187
    result = self.rpc.call_instance_migrate(source_node, instance,
4188
                                            self.nodes_ip[target_node],
4189
                                            self.op.live)
4190
    msg = result.fail_msg
4191
    if msg:
4192
      logging.error("Instance migration failed, trying to revert"
4193
                    " disk status: %s", msg)
4194
      self._AbortMigration()
4195
      self._RevertDiskStatus()
4196
      raise errors.OpExecError("Could not migrate instance %s: %s" %
4197
                               (instance.name, msg))
4198
    time.sleep(10)
4199

    
4200
    instance.primary_node = target_node
4201
    # distribute new instance config to the other nodes
4202
    self.cfg.Update(instance)
4203

    
4204
    result = self.rpc.call_finalize_migration(target_node,
4205
                                              instance,
4206
                                              migration_info,
4207
                                              True)
4208
    msg = result.fail_msg
4209
    if msg:
4210
      logging.error("Instance migration succeeded, but finalization failed:"
4211
                    " %s" % msg)
4212
      raise errors.OpExecError("Could not finalize instance migration: %s" %
4213
                               msg)
4214

    
4215
    self._EnsureSecondary(source_node)
4216
    self._WaitUntilSync()
4217
    self._GoStandalone()
4218
    self._GoReconnect(False)
4219
    self._WaitUntilSync()
4220

    
4221
    self.feedback_fn("* done")
4222

    
4223
  def Exec(self, feedback_fn):
4224
    """Perform the migration.
4225

4226
    """
4227
    self.feedback_fn = feedback_fn
4228

    
4229
    self.source_node = self.instance.primary_node
4230
    self.target_node = self.instance.secondary_nodes[0]
4231
    self.all_nodes = [self.source_node, self.target_node]
4232
    self.nodes_ip = {
4233
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
4234
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
4235
      }
4236
    if self.op.cleanup:
4237
      return self._ExecCleanup()
4238
    else:
4239
      return self._ExecMigration()
4240

    
4241

    
4242
def _CreateBlockDev(lu, node, instance, device, force_create,
4243
                    info, force_open):
4244
  """Create a tree of block devices on a given node.
4245

4246
  If this device type has to be created on secondaries, create it and
4247
  all its children.
4248

4249
  If not, just recurse to children keeping the same 'force' value.
4250

4251
  @param lu: the lu on whose behalf we execute
4252
  @param node: the node on which to create the device
4253
  @type instance: L{objects.Instance}
4254
  @param instance: the instance which owns the device
4255
  @type device: L{objects.Disk}
4256
  @param device: the device to create
4257
  @type force_create: boolean
4258
  @param force_create: whether to force creation of this device; this
4259
      will be change to True whenever we find a device which has
4260
      CreateOnSecondary() attribute
4261
  @param info: the extra 'metadata' we should attach to the device
4262
      (this will be represented as a LVM tag)
4263
  @type force_open: boolean
4264
  @param force_open: this parameter will be passes to the
4265
      L{backend.BlockdevCreate} function where it specifies
4266
      whether we run on primary or not, and it affects both
4267
      the child assembly and the device own Open() execution
4268

4269
  """
4270
  if device.CreateOnSecondary():
4271
    force_create = True
4272

    
4273
  if device.children:
4274
    for child in device.children:
4275
      _CreateBlockDev(lu, node, instance, child, force_create,
4276
                      info, force_open)
4277

    
4278
  if not force_create:
4279
    return
4280

    
4281
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
4282

    
4283

    
4284
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
4285
  """Create a single block device on a given node.
4286

4287
  This will not recurse over children of the device, so they must be
4288
  created in advance.
4289

4290
  @param lu: the lu on whose behalf we execute
4291
  @param node: the node on which to create the device
4292
  @type instance: L{objects.Instance}
4293
  @param instance: the instance which owns the device
4294
  @type device: L{objects.Disk}
4295
  @param device: the device to create
4296
  @param info: the extra 'metadata' we should attach to the device
4297
      (this will be represented as a LVM tag)
4298
  @type force_open: boolean
4299
  @param force_open: this parameter will be passes to the
4300
      L{backend.BlockdevCreate} function where it specifies
4301
      whether we run on primary or not, and it affects both
4302
      the child assembly and the device own Open() execution
4303

4304
  """
4305
  lu.cfg.SetDiskID(device, node)
4306
  result = lu.rpc.call_blockdev_create(node, device, device.size,
4307
                                       instance.name, force_open, info)
4308
  result.Raise("Can't create block device %s on"
4309
               " node %s for instance %s" % (device, node, instance.name))
4310
  if device.physical_id is None:
4311
    device.physical_id = result.payload
4312

    
4313

    
4314
def _GenerateUniqueNames(lu, exts):
4315
  """Generate a suitable LV name.
4316

4317
  This will generate a logical volume name for the given instance.
4318

4319
  """
4320
  results = []
4321
  for val in exts:
4322
    new_id = lu.cfg.GenerateUniqueID()
4323
    results.append("%s%s" % (new_id, val))
4324
  return results
4325

    
4326

    
4327
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
4328
                         p_minor, s_minor):
4329
  """Generate a drbd8 device complete with its children.
4330

4331
  """
4332
  port = lu.cfg.AllocatePort()
4333
  vgname = lu.cfg.GetVGName()
4334
  shared_secret = lu.cfg.GenerateDRBDSecret()
4335
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
4336
                          logical_id=(vgname, names[0]))
4337
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
4338
                          logical_id=(vgname, names[1]))
4339
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
4340
                          logical_id=(primary, secondary, port,
4341
                                      p_minor, s_minor,
4342
                                      shared_secret),
4343
                          children=[dev_data, dev_meta],
4344
                          iv_name=iv_name)
4345
  return drbd_dev
4346

    
4347

    
4348
def _GenerateDiskTemplate(lu, template_name,
4349
                          instance_name, primary_node,
4350
                          secondary_nodes, disk_info,
4351
                          file_storage_dir, file_driver,
4352
                          base_index):
4353
  """Generate the entire disk layout for a given template type.
4354

4355
  """
4356
  #TODO: compute space requirements
4357

    
4358
  vgname = lu.cfg.GetVGName()
4359
  disk_count = len(disk_info)
4360
  disks = []
4361
  if template_name == constants.DT_DISKLESS:
4362
    pass
4363
  elif template_name == constants.DT_PLAIN:
4364
    if len(secondary_nodes) != 0:
4365
      raise errors.ProgrammerError("Wrong template configuration")
4366

    
4367
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
4368
                                      for i in range(disk_count)])
4369
    for idx, disk in enumerate(disk_info):
4370
      disk_index = idx + base_index
4371
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
4372
                              logical_id=(vgname, names[idx]),
4373
                              iv_name="disk/%d" % disk_index,
4374
                              mode=disk["mode"])
4375
      disks.append(disk_dev)
4376
  elif template_name == constants.DT_DRBD8:
4377
    if len(secondary_nodes) != 1:
4378
      raise errors.ProgrammerError("Wrong template configuration")
4379
    remote_node = secondary_nodes[0]
4380
    minors = lu.cfg.AllocateDRBDMinor(
4381
      [primary_node, remote_node] * len(disk_info), instance_name)
4382

    
4383
    names = []
4384
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
4385
                                               for i in range(disk_count)]):
4386
      names.append(lv_prefix + "_data")
4387
      names.append(lv_prefix + "_meta")
4388
    for idx, disk in enumerate(disk_info):
4389
      disk_index = idx + base_index
4390
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
4391
                                      disk["size"], names[idx*2:idx*2+2],
4392
                                      "disk/%d" % disk_index,
4393
                                      minors[idx*2], minors[idx*2+1])
4394
      disk_dev.mode = disk["mode"]
4395
      disks.append(disk_dev)
4396
  elif template_name == constants.DT_FILE:
4397
    if len(secondary_nodes) != 0:
4398
      raise errors.ProgrammerError("Wrong template configuration")
4399

    
4400
    for idx, disk in enumerate(disk_info):
4401
      disk_index = idx + base_index
4402
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
4403
                              iv_name="disk/%d" % disk_index,
4404
                              logical_id=(file_driver,
4405
                                          "%s/disk%d" % (file_storage_dir,
4406
                                                         disk_index)),
4407
                              mode=disk["mode"])
4408
      disks.append(disk_dev)
4409
  else:
4410
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
4411
  return disks
4412

    
4413

    
4414
def _GetInstanceInfoText(instance):
4415
  """Compute that text that should be added to the disk's metadata.
4416

4417
  """
4418
  return "originstname+%s" % instance.name
4419

    
4420

    
4421
def _CreateDisks(lu, instance):
4422
  """Create all disks for an instance.
4423

4424
  This abstracts away some work from AddInstance.
4425

4426
  @type lu: L{LogicalUnit}
4427
  @param lu: the logical unit on whose behalf we execute
4428
  @type instance: L{objects.Instance}
4429
  @param instance: the instance whose disks we should create
4430
  @rtype: boolean
4431
  @return: the success of the creation
4432

4433
  """
4434
  info = _GetInstanceInfoText(instance)
4435
  pnode = instance.primary_node
4436

    
4437
  if instance.disk_template == constants.DT_FILE:
4438
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4439
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
4440

    
4441
    result.Raise("Failed to create directory '%s' on"
4442
                 " node %s: %s" % (file_storage_dir, pnode))
4443

    
4444
  # Note: this needs to be kept in sync with adding of disks in
4445
  # LUSetInstanceParams
4446
  for device in instance.disks:
4447
    logging.info("Creating volume %s for instance %s",
4448
                 device.iv_name, instance.name)
4449
    #HARDCODE
4450
    for node in instance.all_nodes:
4451
      f_create = node == pnode
4452
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
4453

    
4454

    
4455
def _RemoveDisks(lu, instance):
4456
  """Remove all disks for an instance.
4457

4458
  This abstracts away some work from `AddInstance()` and
4459
  `RemoveInstance()`. Note that in case some of the devices couldn't
4460
  be removed, the removal will continue with the other ones (compare
4461
  with `_CreateDisks()`).
4462

4463
  @type lu: L{LogicalUnit}
4464
  @param lu: the logical unit on whose behalf we execute
4465
  @type instance: L{objects.Instance}
4466
  @param instance: the instance whose disks we should remove
4467
  @rtype: boolean
4468
  @return: the success of the removal
4469

4470
  """
4471
  logging.info("Removing block devices for instance %s", instance.name)
4472

    
4473
  all_result = True
4474
  for device in instance.disks:
4475
    for node, disk in device.ComputeNodeTree(instance.primary_node):
4476
      lu.cfg.SetDiskID(disk, node)
4477
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
4478
      if msg:
4479
        lu.LogWarning("Could not remove block device %s on node %s,"
4480
                      " continuing anyway: %s", device.iv_name, node, msg)
4481
        all_result = False
4482

    
4483
  if instance.disk_template == constants.DT_FILE:
4484
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4485
    result = lu.rpc.call_file_storage_dir_remove(instance.primary_node,
4486
                                                 file_storage_dir)
4487
    msg = result.fail_msg
4488
    if msg:
4489
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
4490
                    file_storage_dir, instance.primary_node, msg)
4491
      all_result = False
4492

    
4493
  return all_result
4494

    
4495

    
4496
def _ComputeDiskSize(disk_template, disks):
4497
  """Compute disk size requirements in the volume group
4498

4499
  """
4500
  # Required free disk space as a function of disk and swap space
4501
  req_size_dict = {
4502
    constants.DT_DISKLESS: None,
4503
    constants.DT_PLAIN: sum(d["size"] for d in disks),
4504
    # 128 MB are added for drbd metadata for each disk
4505
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
4506
    constants.DT_FILE: None,
4507
  }
4508

    
4509
  if disk_template not in req_size_dict:
4510
    raise errors.ProgrammerError("Disk template '%s' size requirement"
4511
                                 " is unknown" %  disk_template)
4512

    
4513
  return req_size_dict[disk_template]
4514

    
4515

    
4516
def _CheckHVParams(lu, nodenames, hvname, hvparams):
4517
  """Hypervisor parameter validation.
4518

4519
  This function abstract the hypervisor parameter validation to be
4520
  used in both instance create and instance modify.
4521

4522
  @type lu: L{LogicalUnit}
4523
  @param lu: the logical unit for which we check
4524
  @type nodenames: list
4525
  @param nodenames: the list of nodes on which we should check
4526
  @type hvname: string
4527
  @param hvname: the name of the hypervisor we should use
4528
  @type hvparams: dict
4529
  @param hvparams: the parameters which we need to check
4530
  @raise errors.OpPrereqError: if the parameters are not valid
4531

4532
  """
4533
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
4534
                                                  hvname,
4535
                                                  hvparams)
4536
  for node in nodenames:
4537
    info = hvinfo[node]
4538
    if info.offline:
4539
      continue
4540
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
4541

    
4542

    
4543
class LUCreateInstance(LogicalUnit):
4544
  """Create an instance.
4545

4546
  """
4547
  HPATH = "instance-add"
4548
  HTYPE = constants.HTYPE_INSTANCE
4549
  _OP_REQP = ["instance_name", "disks", "disk_template",
4550
              "mode", "start",
4551
              "wait_for_sync", "ip_check", "nics",
4552
              "hvparams", "beparams"]
4553
  REQ_BGL = False
4554

    
4555
  def _ExpandNode(self, node):
4556
    """Expands and checks one node name.
4557

4558
    """
4559
    node_full = self.cfg.ExpandNodeName(node)
4560
    if node_full is None:
4561
      raise errors.OpPrereqError("Unknown node %s" % node)
4562
    return node_full
4563

    
4564
  def ExpandNames(self):
4565
    """ExpandNames for CreateInstance.
4566

4567
    Figure out the right locks for instance creation.
4568

4569
    """
4570
    self.needed_locks = {}
4571

    
4572
    # set optional parameters to none if they don't exist
4573
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
4574
      if not hasattr(self.op, attr):
4575
        setattr(self.op, attr, None)
4576

    
4577
    # cheap checks, mostly valid constants given
4578

    
4579
    # verify creation mode
4580
    if self.op.mode not in (constants.INSTANCE_CREATE,
4581
                            constants.INSTANCE_IMPORT):
4582
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
4583
                                 self.op.mode)
4584

    
4585
    # disk template and mirror node verification
4586
    if self.op.disk_template not in constants.DISK_TEMPLATES:
4587
      raise errors.OpPrereqError("Invalid disk template name")
4588

    
4589
    if self.op.hypervisor is None:
4590
      self.op.hypervisor = self.cfg.GetHypervisorType()
4591

    
4592
    cluster = self.cfg.GetClusterInfo()
4593
    enabled_hvs = cluster.enabled_hypervisors
4594
    if self.op.hypervisor not in enabled_hvs:
4595
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
4596
                                 " cluster (%s)" % (self.op.hypervisor,
4597
                                  ",".join(enabled_hvs)))
4598

    
4599
    # check hypervisor parameter syntax (locally)
4600
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4601
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
4602
                                  self.op.hvparams)
4603
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
4604
    hv_type.CheckParameterSyntax(filled_hvp)
4605
    self.hv_full = filled_hvp
4606

    
4607
    # fill and remember the beparams dict
4608
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4609
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
4610
                                    self.op.beparams)
4611

    
4612
    #### instance parameters check
4613

    
4614
    # instance name verification
4615
    hostname1 = utils.HostInfo(self.op.instance_name)
4616
    self.op.instance_name = instance_name = hostname1.name
4617

    
4618
    # this is just a preventive check, but someone might still add this
4619
    # instance in the meantime, and creation will fail at lock-add time
4620
    if instance_name in self.cfg.GetInstanceList():
4621
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4622
                                 instance_name)
4623

    
4624
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
4625

    
4626
    # NIC buildup
4627
    self.nics = []
4628
    for idx, nic in enumerate(self.op.nics):
4629
      nic_mode_req = nic.get("mode", None)
4630
      nic_mode = nic_mode_req
4631
      if nic_mode is None:
4632
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
4633

    
4634
      # in routed mode, for the first nic, the default ip is 'auto'
4635
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
4636
        default_ip_mode = constants.VALUE_AUTO
4637
      else:
4638
        default_ip_mode = constants.VALUE_NONE
4639

    
4640
      # ip validity checks
4641
      ip = nic.get("ip", default_ip_mode)
4642
      if ip is None or ip.lower() == constants.VALUE_NONE:
4643
        nic_ip = None
4644
      elif ip.lower() == constants.VALUE_AUTO:
4645
        nic_ip = hostname1.ip
4646
      else:
4647
        if not utils.IsValidIP(ip):
4648
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
4649
                                     " like a valid IP" % ip)
4650
        nic_ip = ip
4651

    
4652
      # TODO: check the ip for uniqueness !!
4653
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
4654
        raise errors.OpPrereqError("Routed nic mode requires an ip address")
4655

    
4656
      # MAC address verification
4657
      mac = nic.get("mac", constants.VALUE_AUTO)
4658
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
4659
        if not utils.IsValidMac(mac.lower()):
4660
          raise errors.OpPrereqError("Invalid MAC address specified: %s" %
4661
                                     mac)
4662
      # bridge verification
4663
      bridge = nic.get("bridge", None)
4664
      link = nic.get("link", None)
4665
      if bridge and link:
4666
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
4667
                                   " at the same time")
4668
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
4669
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic")
4670
      elif bridge:
4671
        link = bridge
4672

    
4673
      nicparams = {}
4674
      if nic_mode_req:
4675
        nicparams[constants.NIC_MODE] = nic_mode_req
4676
      if link:
4677
        nicparams[constants.NIC_LINK] = link
4678

    
4679
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4680
                                      nicparams)
4681
      objects.NIC.CheckParameterSyntax(check_params)
4682
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
4683

    
4684
    # disk checks/pre-build
4685
    self.disks = []
4686
    for disk in self.op.disks:
4687
      mode = disk.get("mode", constants.DISK_RDWR)
4688
      if mode not in constants.DISK_ACCESS_SET:
4689
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
4690
                                   mode)
4691
      size = disk.get("size", None)
4692
      if size is None:
4693
        raise errors.OpPrereqError("Missing disk size")
4694
      try:
4695
        size = int(size)
4696
      except ValueError:
4697
        raise errors.OpPrereqError("Invalid disk size '%s'" % size)
4698
      self.disks.append({"size": size, "mode": mode})
4699

    
4700
    # used in CheckPrereq for ip ping check
4701
    self.check_ip = hostname1.ip
4702

    
4703
    # file storage checks
4704
    if (self.op.file_driver and
4705
        not self.op.file_driver in constants.FILE_DRIVER):
4706
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
4707
                                 self.op.file_driver)
4708

    
4709
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
4710
      raise errors.OpPrereqError("File storage directory path not absolute")
4711

    
4712
    ### Node/iallocator related checks
4713
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
4714
      raise errors.OpPrereqError("One and only one of iallocator and primary"
4715
                                 " node must be given")
4716

    
4717
    if self.op.iallocator:
4718
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4719
    else:
4720
      self.op.pnode = self._ExpandNode(self.op.pnode)
4721
      nodelist = [self.op.pnode]
4722
      if self.op.snode is not None:
4723
        self.op.snode = self._ExpandNode(self.op.snode)
4724
        nodelist.append(self.op.snode)
4725
      self.needed_locks[locking.LEVEL_NODE] = nodelist
4726

    
4727
    # in case of import lock the source node too
4728
    if self.op.mode == constants.INSTANCE_IMPORT:
4729
      src_node = getattr(self.op, "src_node", None)
4730
      src_path = getattr(self.op, "src_path", None)
4731

    
4732
      if src_path is None:
4733
        self.op.src_path = src_path = self.op.instance_name
4734

    
4735
      if src_node is None:
4736
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4737
        self.op.src_node = None
4738
        if os.path.isabs(src_path):
4739
          raise errors.OpPrereqError("Importing an instance from an absolute"
4740
                                     " path requires a source node option.")
4741
      else:
4742
        self.op.src_node = src_node = self._ExpandNode(src_node)
4743
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4744
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
4745
        if not os.path.isabs(src_path):
4746
          self.op.src_path = src_path = \
4747
            os.path.join(constants.EXPORT_DIR, src_path)
4748

    
4749
    else: # INSTANCE_CREATE
4750
      if getattr(self.op, "os_type", None) is None:
4751
        raise errors.OpPrereqError("No guest OS specified")
4752

    
4753
  def _RunAllocator(self):
4754
    """Run the allocator based on input opcode.
4755

4756
    """
4757
    nics = [n.ToDict() for n in self.nics]
4758
    ial = IAllocator(self.cfg, self.rpc,
4759
                     mode=constants.IALLOCATOR_MODE_ALLOC,
4760
                     name=self.op.instance_name,
4761
                     disk_template=self.op.disk_template,
4762
                     tags=[],
4763
                     os=self.op.os_type,
4764
                     vcpus=self.be_full[constants.BE_VCPUS],
4765
                     mem_size=self.be_full[constants.BE_MEMORY],
4766
                     disks=self.disks,
4767
                     nics=nics,
4768
                     hypervisor=self.op.hypervisor,
4769
                     )
4770

    
4771
    ial.Run(self.op.iallocator)
4772

    
4773
    if not ial.success:
4774
      raise errors.OpPrereqError("Can't compute nodes using"
4775
                                 " iallocator '%s': %s" % (self.op.iallocator,
4776
                                                           ial.info))
4777
    if len(ial.nodes) != ial.required_nodes:
4778
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
4779
                                 " of nodes (%s), required %s" %
4780
                                 (self.op.iallocator, len(ial.nodes),
4781
                                  ial.required_nodes))
4782
    self.op.pnode = ial.nodes[0]
4783
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
4784
                 self.op.instance_name, self.op.iallocator,
4785
                 ", ".join(ial.nodes))
4786
    if ial.required_nodes == 2:
4787
      self.op.snode = ial.nodes[1]
4788

    
4789
  def BuildHooksEnv(self):
4790
    """Build hooks env.
4791

4792
    This runs on master, primary and secondary nodes of the instance.
4793

4794
    """
4795
    env = {
4796
      "ADD_MODE": self.op.mode,
4797
      }
4798
    if self.op.mode == constants.INSTANCE_IMPORT:
4799
      env["SRC_NODE"] = self.op.src_node
4800
      env["SRC_PATH"] = self.op.src_path
4801
      env["SRC_IMAGES"] = self.src_images
4802

    
4803
    env.update(_BuildInstanceHookEnv(
4804
      name=self.op.instance_name,
4805
      primary_node=self.op.pnode,
4806
      secondary_nodes=self.secondaries,
4807
      status=self.op.start,
4808
      os_type=self.op.os_type,
4809
      memory=self.be_full[constants.BE_MEMORY],
4810
      vcpus=self.be_full[constants.BE_VCPUS],
4811
      nics=_NICListToTuple(self, self.nics),
4812
      disk_template=self.op.disk_template,
4813
      disks=[(d["size"], d["mode"]) for d in self.disks],
4814
      bep=self.be_full,
4815
      hvp=self.hv_full,
4816
      hypervisor_name=self.op.hypervisor,
4817
    ))
4818

    
4819
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
4820
          self.secondaries)
4821
    return env, nl, nl
4822

    
4823

    
4824
  def CheckPrereq(self):
4825
    """Check prerequisites.
4826

4827
    """
4828
    if (not self.cfg.GetVGName() and
4829
        self.op.disk_template not in constants.DTS_NOT_LVM):
4830
      raise errors.OpPrereqError("Cluster does not support lvm-based"
4831
                                 " instances")
4832

    
4833
    if self.op.mode == constants.INSTANCE_IMPORT:
4834
      src_node = self.op.src_node
4835
      src_path = self.op.src_path
4836

    
4837
      if src_node is None:
4838
        locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
4839
        exp_list = self.rpc.call_export_list(locked_nodes)
4840
        found = False
4841
        for node in exp_list:
4842
          if exp_list[node].fail_msg:
4843
            continue
4844
          if src_path in exp_list[node].payload:
4845
            found = True
4846
            self.op.src_node = src_node = node
4847
            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
4848
                                                       src_path)
4849
            break
4850
        if not found:
4851
          raise errors.OpPrereqError("No export found for relative path %s" %
4852
                                      src_path)
4853

    
4854
      _CheckNodeOnline(self, src_node)
4855
      result = self.rpc.call_export_info(src_node, src_path)
4856
      result.Raise("No export or invalid export found in dir %s" % src_path)
4857

    
4858
      export_info = objects.SerializableConfigParser.Loads(str(result.payload))
4859
      if not export_info.has_section(constants.INISECT_EXP):
4860
        raise errors.ProgrammerError("Corrupted export config")
4861

    
4862
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
4863
      if (int(ei_version) != constants.EXPORT_VERSION):
4864
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
4865
                                   (ei_version, constants.EXPORT_VERSION))
4866

    
4867
      # Check that the new instance doesn't have less disks than the export
4868
      instance_disks = len(self.disks)
4869
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
4870
      if instance_disks < export_disks:
4871
        raise errors.OpPrereqError("Not enough disks to import."
4872
                                   " (instance: %d, export: %d)" %
4873
                                   (instance_disks, export_disks))
4874

    
4875
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
4876
      disk_images = []
4877
      for idx in range(export_disks):
4878
        option = 'disk%d_dump' % idx
4879
        if export_info.has_option(constants.INISECT_INS, option):
4880
          # FIXME: are the old os-es, disk sizes, etc. useful?
4881
          export_name = export_info.get(constants.INISECT_INS, option)
4882
          image = os.path.join(src_path, export_name)
4883
          disk_images.append(image)
4884
        else:
4885
          disk_images.append(False)
4886

    
4887
      self.src_images = disk_images
4888

    
4889
      old_name = export_info.get(constants.INISECT_INS, 'name')
4890
      # FIXME: int() here could throw a ValueError on broken exports
4891
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
4892
      if self.op.instance_name == old_name:
4893
        for idx, nic in enumerate(self.nics):
4894
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
4895
            nic_mac_ini = 'nic%d_mac' % idx
4896
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
4897

    
4898
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
4899
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
4900
    if self.op.start and not self.op.ip_check:
4901
      raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
4902
                                 " adding an instance in start mode")
4903

    
4904
    if self.op.ip_check:
4905
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
4906
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4907
                                   (self.check_ip, self.op.instance_name))
4908

    
4909
    #### mac address generation
4910
    # By generating here the mac address both the allocator and the hooks get
4911
    # the real final mac address rather than the 'auto' or 'generate' value.
4912
    # There is a race condition between the generation and the instance object
4913
    # creation, which means that we know the mac is valid now, but we're not
4914
    # sure it will be when we actually add the instance. If things go bad
4915
    # adding the instance will abort because of a duplicate mac, and the
4916
    # creation job will fail.
4917
    for nic in self.nics:
4918
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
4919
        nic.mac = self.cfg.GenerateMAC()
4920

    
4921
    #### allocator run
4922

    
4923
    if self.op.iallocator is not None:
4924
      self._RunAllocator()
4925

    
4926
    #### node related checks
4927

    
4928
    # check primary node
4929
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
4930
    assert self.pnode is not None, \
4931
      "Cannot retrieve locked node %s" % self.op.pnode
4932
    if pnode.offline:
4933
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
4934
                                 pnode.name)
4935
    if pnode.drained:
4936
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
4937
                                 pnode.name)
4938

    
4939
    self.secondaries = []
4940

    
4941
    # mirror node verification
4942
    if self.op.disk_template in constants.DTS_NET_MIRROR:
4943
      if self.op.snode is None:
4944
        raise errors.OpPrereqError("The networked disk templates need"
4945
                                   " a mirror node")
4946
      if self.op.snode == pnode.name:
4947
        raise errors.OpPrereqError("The secondary node cannot be"
4948
                                   " the primary node.")
4949
      _CheckNodeOnline(self, self.op.snode)
4950
      _CheckNodeNotDrained(self, self.op.snode)
4951
      self.secondaries.append(self.op.snode)
4952

    
4953
    nodenames = [pnode.name] + self.secondaries
4954

    
4955
    req_size = _ComputeDiskSize(self.op.disk_template,
4956
                                self.disks)
4957

    
4958
    # Check lv size requirements
4959
    if req_size is not None:
4960
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
4961
                                         self.op.hypervisor)
4962
      for node in nodenames:
4963
        info = nodeinfo[node]
4964
        info.Raise("Cannot get current information from node %s" % node)
4965
        info = info.payload
4966
        vg_free = info.get('vg_free', None)
4967
        if not isinstance(vg_free, int):
4968
          raise errors.OpPrereqError("Can't compute free disk space on"
4969
                                     " node %s" % node)
4970
        if req_size > vg_free:
4971
          raise errors.OpPrereqError("Not enough disk space on target node %s."
4972
                                     " %d MB available, %d MB required" %
4973
                                     (node, vg_free, req_size))
4974

    
4975
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
4976

    
4977
    # os verification
4978
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
4979
    result.Raise("OS '%s' not in supported os list for primary node %s" %
4980
                 (self.op.os_type, pnode.name), prereq=True)
4981

    
4982
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
4983

    
4984
    # memory check on primary node
4985
    if self.op.start:
4986
      _CheckNodeFreeMemory(self, self.pnode.name,
4987
                           "creating instance %s" % self.op.instance_name,
4988
                           self.be_full[constants.BE_MEMORY],
4989
                           self.op.hypervisor)
4990

    
4991
    self.dry_run_result = list(nodenames)
4992

    
4993
  def Exec(self, feedback_fn):
4994
    """Create and add the instance to the cluster.
4995

4996
    """
4997
    instance = self.op.instance_name
4998
    pnode_name = self.pnode.name
4999

    
5000
    ht_kind = self.op.hypervisor
5001
    if ht_kind in constants.HTS_REQ_PORT:
5002
      network_port = self.cfg.AllocatePort()
5003
    else:
5004
      network_port = None
5005

    
5006
    ##if self.op.vnc_bind_address is None:
5007
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
5008

    
5009
    # this is needed because os.path.join does not accept None arguments
5010
    if self.op.file_storage_dir is None:
5011
      string_file_storage_dir = ""
5012
    else:
5013
      string_file_storage_dir = self.op.file_storage_dir
5014

    
5015
    # build the full file storage dir path
5016
    file_storage_dir = os.path.normpath(os.path.join(
5017
                                        self.cfg.GetFileStorageDir(),
5018
                                        string_file_storage_dir, instance))
5019

    
5020

    
5021
    disks = _GenerateDiskTemplate(self,
5022
                                  self.op.disk_template,
5023
                                  instance, pnode_name,
5024
                                  self.secondaries,
5025
                                  self.disks,
5026
                                  file_storage_dir,
5027
                                  self.op.file_driver,
5028
                                  0)
5029

    
5030
    iobj = objects.Instance(name=instance, os=self.op.os_type,
5031
                            primary_node=pnode_name,
5032
                            nics=self.nics, disks=disks,
5033
                            disk_template=self.op.disk_template,
5034
                            admin_up=False,
5035
                            network_port=network_port,
5036
                            beparams=self.op.beparams,
5037
                            hvparams=self.op.hvparams,
5038
                            hypervisor=self.op.hypervisor,
5039
                            )
5040

    
5041
    feedback_fn("* creating instance disks...")
5042
    try:
5043
      _CreateDisks(self, iobj)
5044
    except errors.OpExecError:
5045
      self.LogWarning("Device creation failed, reverting...")
5046
      try:
5047
        _RemoveDisks(self, iobj)
5048
      finally:
5049
        self.cfg.ReleaseDRBDMinors(instance)
5050
        raise
5051

    
5052
    feedback_fn("adding instance %s to cluster config" % instance)
5053

    
5054
    self.cfg.AddInstance(iobj)
5055
    # Declare that we don't want to remove the instance lock anymore, as we've
5056
    # added the instance to the config
5057
    del self.remove_locks[locking.LEVEL_INSTANCE]
5058
    # Unlock all the nodes
5059
    if self.op.mode == constants.INSTANCE_IMPORT:
5060
      nodes_keep = [self.op.src_node]
5061
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
5062
                       if node != self.op.src_node]
5063
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
5064
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
5065
    else:
5066
      self.context.glm.release(locking.LEVEL_NODE)
5067
      del self.acquired_locks[locking.LEVEL_NODE]
5068

    
5069
    if self.op.wait_for_sync:
5070
      disk_abort = not _WaitForSync(self, iobj)
5071
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
5072
      # make sure the disks are not degraded (still sync-ing is ok)
5073
      time.sleep(15)
5074
      feedback_fn("* checking mirrors status")
5075
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
5076
    else:
5077
      disk_abort = False
5078

    
5079
    if disk_abort:
5080
      _RemoveDisks(self, iobj)
5081
      self.cfg.RemoveInstance(iobj.name)
5082
      # Make sure the instance lock gets removed
5083
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
5084
      raise errors.OpExecError("There are some degraded disks for"
5085
                               " this instance")
5086

    
5087
    feedback_fn("creating os for instance %s on node %s" %
5088
                (instance, pnode_name))
5089

    
5090
    if iobj.disk_template != constants.DT_DISKLESS:
5091
      if self.op.mode == constants.INSTANCE_CREATE:
5092
        feedback_fn("* running the instance OS create scripts...")
5093
        result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
5094
        result.Raise("Could not add os for instance %s"
5095
                     " on node %s" % (instance, pnode_name))
5096

    
5097
      elif self.op.mode == constants.INSTANCE_IMPORT:
5098
        feedback_fn("* running the instance OS import scripts...")
5099
        src_node = self.op.src_node
5100
        src_images = self.src_images
5101
        cluster_name = self.cfg.GetClusterName()
5102
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
5103
                                                         src_node, src_images,
5104
                                                         cluster_name)
5105
        msg = import_result.fail_msg
5106
        if msg:
5107
          self.LogWarning("Error while importing the disk images for instance"
5108
                          " %s on node %s: %s" % (instance, pnode_name, msg))
5109
      else:
5110
        # also checked in the prereq part
5111
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
5112
                                     % self.op.mode)
5113

    
5114
    if self.op.start:
5115
      iobj.admin_up = True
5116
      self.cfg.Update(iobj)
5117
      logging.info("Starting instance %s on node %s", instance, pnode_name)
5118
      feedback_fn("* starting instance...")
5119
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
5120
      result.Raise("Could not start instance")
5121

    
5122
    return list(iobj.all_nodes)
5123

    
5124

    
5125
class LUConnectConsole(NoHooksLU):
5126
  """Connect to an instance's console.
5127

5128
  This is somewhat special in that it returns the command line that
5129
  you need to run on the master node in order to connect to the
5130
  console.
5131

5132
  """
5133
  _OP_REQP = ["instance_name"]
5134
  REQ_BGL = False
5135

    
5136
  def ExpandNames(self):
5137
    self._ExpandAndLockInstance()
5138

    
5139
  def CheckPrereq(self):
5140
    """Check prerequisites.
5141

5142
    This checks that the instance is in the cluster.
5143

5144
    """
5145
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5146
    assert self.instance is not None, \
5147
      "Cannot retrieve locked instance %s" % self.op.instance_name
5148
    _CheckNodeOnline(self, self.instance.primary_node)
5149

    
5150
  def Exec(self, feedback_fn):
5151
    """Connect to the console of an instance
5152

5153
    """
5154
    instance = self.instance
5155
    node = instance.primary_node
5156

    
5157
    node_insts = self.rpc.call_instance_list([node],
5158
                                             [instance.hypervisor])[node]
5159
    node_insts.Raise("Can't get node information from %s" % node)
5160

    
5161
    if instance.name not in node_insts.payload:
5162
      raise errors.OpExecError("Instance %s is not running." % instance.name)
5163

    
5164
    logging.debug("Connecting to console of %s on %s", instance.name, node)
5165

    
5166
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
5167
    cluster = self.cfg.GetClusterInfo()
5168
    # beparams and hvparams are passed separately, to avoid editing the
5169
    # instance and then saving the defaults in the instance itself.
5170
    hvparams = cluster.FillHV(instance)
5171
    beparams = cluster.FillBE(instance)
5172
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
5173

    
5174
    # build ssh cmdline
5175
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
5176

    
5177

    
5178
class LUReplaceDisks(LogicalUnit):
5179
  """Replace the disks of an instance.
5180

5181
  """
5182
  HPATH = "mirrors-replace"
5183
  HTYPE = constants.HTYPE_INSTANCE
5184
  _OP_REQP = ["instance_name", "mode", "disks"]
5185
  REQ_BGL = False
5186

    
5187
  def CheckArguments(self):
5188
    if not hasattr(self.op, "remote_node"):
5189
      self.op.remote_node = None
5190
    if not hasattr(self.op, "iallocator"):
5191
      self.op.iallocator = None
5192

    
5193
    _DiskReplacer.CheckArguments(self.op.mode, self.op.remote_node,
5194
                                 self.op.iallocator)
5195

    
5196
  def ExpandNames(self):
5197
    self._ExpandAndLockInstance()
5198

    
5199
    if self.op.iallocator is not None:
5200
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5201

    
5202
    elif self.op.remote_node is not None:
5203
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
5204
      if remote_node is None:
5205
        raise errors.OpPrereqError("Node '%s' not known" %
5206
                                   self.op.remote_node)
5207

    
5208
      self.op.remote_node = remote_node
5209

    
5210
      # Warning: do not remove the locking of the new secondary here
5211
      # unless DRBD8.AddChildren is changed to work in parallel;
5212
      # currently it doesn't since parallel invocations of
5213
      # FindUnusedMinor will conflict
5214
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
5215
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5216

    
5217
    else:
5218
      self.needed_locks[locking.LEVEL_NODE] = []
5219
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5220

    
5221
    self.replacer = _DiskReplacer(self, self.op.instance_name, self.op.mode,
5222
                                  self.op.iallocator, self.op.remote_node,
5223
                                  self.op.disks)
5224

    
5225
  def DeclareLocks(self, level):
5226
    # If we're not already locking all nodes in the set we have to declare the
5227
    # instance's primary/secondary nodes.
5228
    if (level == locking.LEVEL_NODE and
5229
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
5230
      self._LockInstancesNodes()
5231

    
5232
  def BuildHooksEnv(self):
5233
    """Build hooks env.
5234

5235
    This runs on the master, the primary and all the secondaries.
5236

5237
    """
5238
    instance = self.replacer.instance
5239
    env = {
5240
      "MODE": self.op.mode,
5241
      "NEW_SECONDARY": self.op.remote_node,
5242
      "OLD_SECONDARY": instance.secondary_nodes[0],
5243
      }
5244
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5245
    nl = [
5246
      self.cfg.GetMasterNode(),
5247
      instance.primary_node,
5248
      ]
5249
    if self.op.remote_node is not None:
5250
      nl.append(self.op.remote_node)
5251
    return env, nl, nl
5252

    
5253
  def CheckPrereq(self):
5254
    """Check prerequisites.
5255

5256
    This checks that the instance is in the cluster.
5257

5258
    """
5259
    self.replacer.CheckPrereq()
5260

    
5261
  def Exec(self, feedback_fn):
5262
    """Execute disk replacement.
5263

5264
    This dispatches the disk replacement to the appropriate handler.
5265

5266
    """
5267
    self.replacer.Exec()
5268

    
5269

    
5270
class _DiskReplacer:
5271
  """Replaces disks for an instance.
5272

5273
  Note: Locking is not within the scope of this class.
5274

5275
  """
5276
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
5277
               disks):
5278
    """Initializes this class.
5279

5280
    """
5281
    # Parameters
5282
    self.lu = lu
5283
    self.instance_name = instance_name
5284
    self.mode = mode
5285
    self.iallocator_name = iallocator_name
5286
    self.remote_node = remote_node
5287
    self.disks = disks
5288

    
5289
    # Shortcuts
5290
    self.cfg = lu.cfg
5291
    self.rpc = lu.rpc
5292

    
5293
    # Runtime data
5294
    self.instance = None
5295
    self.new_node = None
5296
    self.target_node = None
5297
    self.other_node = None
5298
    self.remote_node_info = None
5299
    self.node_secondary_ip = None
5300

    
5301
  @staticmethod
5302
  def CheckArguments(mode, remote_node, iallocator):
5303
    # check for valid parameter combination
5304
    cnt = [remote_node, iallocator].count(None)
5305
    if mode == constants.REPLACE_DISK_CHG:
5306
      if cnt == 2:
5307
        raise errors.OpPrereqError("When changing the secondary either an"
5308
                                   " iallocator script must be used or the"
5309
                                   " new node given")
5310
      elif cnt == 0:
5311
        raise errors.OpPrereqError("Give either the iallocator or the new"
5312
                                   " secondary, not both")
5313
    else: # not replacing the secondary
5314
      if cnt != 2:
5315
        raise errors.OpPrereqError("The iallocator and new node options can"
5316
                                   " be used only when changing the"
5317
                                   " secondary node")
5318

    
5319
  @staticmethod
5320
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
5321
    """Compute a new secondary node using an IAllocator.
5322

5323
    """
5324
    ial = IAllocator(lu.cfg, lu.rpc,
5325
                     mode=constants.IALLOCATOR_MODE_RELOC,
5326
                     name=instance_name,
5327
                     relocate_from=relocate_from)
5328

    
5329
    ial.Run(iallocator_name)
5330

    
5331
    if not ial.success:
5332
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
5333
                                 " %s" % (iallocator_name, ial.info))
5334

    
5335
    if len(ial.nodes) != ial.required_nodes:
5336
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5337
                                 " of nodes (%s), required %s" %
5338
                                 (len(ial.nodes), ial.required_nodes))
5339

    
5340
    remote_node_name = ial.nodes[0]
5341

    
5342
    lu.LogInfo("Selected new secondary for instance '%s': %s",
5343
               instance_name, remote_node_name)
5344

    
5345
    return remote_node_name
5346

    
5347
  def CheckPrereq(self):
5348
    """Check prerequisites.
5349

5350
    This checks that the instance is in the cluster.
5351

5352
    """
5353
    self.instance = self.cfg.GetInstanceInfo(self.instance_name)
5354
    assert self.instance is not None, \
5355
      "Cannot retrieve locked instance %s" % self.instance_name
5356

    
5357
    if self.instance.disk_template != constants.DT_DRBD8:
5358
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
5359
                                 " instances")
5360

    
5361
    if len(self.instance.secondary_nodes) != 1:
5362
      raise errors.OpPrereqError("The instance has a strange layout,"
5363
                                 " expected one secondary but found %d" %
5364
                                 len(self.instance.secondary_nodes))
5365

    
5366
    secondary_node = self.instance.secondary_nodes[0]
5367

    
5368
    if self.iallocator_name is None:
5369
      remote_node = self.remote_node
5370
    else:
5371
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
5372
                                       self.instance.name, secondary_node)
5373

    
5374
    if remote_node is not None:
5375
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
5376
      assert self.remote_node_info is not None, \
5377
        "Cannot retrieve locked node %s" % remote_node
5378
    else:
5379
      self.remote_node_info = None
5380

    
5381
    if remote_node == self.instance.primary_node:
5382
      raise errors.OpPrereqError("The specified node is the primary node of"
5383
                                 " the instance.")
5384

    
5385
    if remote_node == secondary_node:
5386
      raise errors.OpPrereqError("The specified node is already the"
5387
                                 " secondary node of the instance.")
5388

    
5389
    if self.mode == constants.REPLACE_DISK_PRI:
5390
      self.target_node = self.instance.primary_node
5391
      self.other_node = secondary_node
5392
      check_nodes = [self.target_node, self.other_node]
5393

    
5394
    elif self.mode == constants.REPLACE_DISK_SEC:
5395
      self.target_node = secondary_node
5396
      self.other_node = self.instance.primary_node
5397
      check_nodes = [self.target_node, self.other_node]
5398

    
5399
    elif self.mode == constants.REPLACE_DISK_CHG:
5400
      self.new_node = remote_node
5401
      self.other_node = self.instance.primary_node
5402
      self.target_node = secondary_node
5403
      check_nodes = [self.new_node, self.other_node]
5404

    
5405
      _CheckNodeNotDrained(self.lu, remote_node)
5406

    
5407
    else:
5408
      raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
5409
                                   self.mode)
5410

    
5411
    for node in check_nodes:
5412
      _CheckNodeOnline(self.lu, node)
5413

    
5414
    # If not specified all disks should be replaced
5415
    if not self.disks:
5416
      self.disks = range(len(self.instance.disks))
5417

    
5418
    # Check whether disks are valid
5419
    for disk_idx in self.disks:
5420
      self.instance.FindDisk(disk_idx)
5421

    
5422
    # Get secondary node IP addresses
5423
    node_2nd_ip = {}
5424

    
5425
    for node_name in [self.target_node, self.other_node, self.new_node]:
5426
      if node_name is not None:
5427
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
5428

    
5429
    self.node_secondary_ip = node_2nd_ip
5430

    
5431
  def Exec(self):
5432
    """Execute disk replacement.
5433

5434
    This dispatches the disk replacement to the appropriate handler.
5435

5436
    """
5437
    activate_disks = (not self.instance.admin_up)
5438

    
5439
    # Activate the instance disks if we're replacing them on a down instance
5440
    if activate_disks:
5441
      _StartInstanceDisks(self.lu, self.instance, True)
5442

    
5443
    try:
5444
      if self.mode == constants.REPLACE_DISK_CHG:
5445
        return self._ExecDrbd8Secondary()
5446
      else:
5447
        return self._ExecDrbd8DiskOnly()
5448

    
5449
    finally:
5450
      # Deactivate the instance disks if we're replacing them on a down instance
5451
      if activate_disks:
5452
        _SafeShutdownInstanceDisks(self.lu, self.instance)
5453

    
5454
  def _CheckVolumeGroup(self, nodes):
5455
    self.lu.LogInfo("Checking volume groups")
5456

    
5457
    vgname = self.cfg.GetVGName()
5458

    
5459
    # Make sure volume group exists on all involved nodes
5460
    results = self.rpc.call_vg_list(nodes)
5461
    if not results:
5462
      raise errors.OpExecError("Can't list volume groups on the nodes")
5463

    
5464
    for node in nodes:
5465
      res = results[node]
5466
      res.Raise("Error checking node %s" % node)
5467
      if vgname not in res.payload:
5468
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
5469
                                 (vgname, node))
5470

    
5471
  def _CheckDisksExistence(self, nodes):
5472
    # Check disk existence
5473
    for idx, dev in enumerate(self.instance.disks):
5474
      if idx not in self.disks:
5475
        continue
5476

    
5477
      for node in nodes:
5478
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
5479
        self.cfg.SetDiskID(dev, node)
5480

    
5481
        result = self.rpc.call_blockdev_find(node, dev)
5482

    
5483
        msg = result.fail_msg
5484
        if msg or not result.payload:
5485
          if not msg:
5486
            msg = "disk not found"
5487
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
5488
                                   (idx, node, msg))
5489

    
5490
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
5491
    for idx, dev in enumerate(self.instance.disks):
5492
      if idx not in self.disks:
5493
        continue
5494

    
5495
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
5496
                      (idx, node_name))
5497

    
5498
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
5499
                                   ldisk=ldisk):
5500
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
5501
                                 " replace disks for instance %s" %
5502
                                 (node_name, self.instance.name))
5503

    
5504
  def _CreateNewStorage(self, node_name):
5505
    vgname = self.cfg.GetVGName()
5506
    iv_names = {}
5507

    
5508
    for idx, dev in enumerate(self.instance.disks):
5509
      if idx not in self.disks:
5510
        continue
5511

    
5512
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
5513

    
5514
      self.cfg.SetDiskID(dev, node_name)
5515

    
5516
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
5517
      names = _GenerateUniqueNames(self.lu, lv_names)
5518

    
5519
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
5520
                             logical_id=(vgname, names[0]))
5521
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5522
                             logical_id=(vgname, names[1]))
5523

    
5524
      new_lvs = [lv_data, lv_meta]
5525
      old_lvs = dev.children
5526
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
5527

    
5528
      # we pass force_create=True to force the LVM creation
5529
      for new_lv in new_lvs:
5530
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
5531
                        _GetInstanceInfoText(self.instance), False)
5532

    
5533
    return iv_names
5534

    
5535
  def _CheckDevices(self, node_name, iv_names):
5536
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
5537
      self.cfg.SetDiskID(dev, node_name)
5538

    
5539
      result = self.rpc.call_blockdev_find(node_name, dev)
5540

    
5541
      msg = result.fail_msg
5542
      if msg or not result.payload:
5543
        if not msg:
5544
          msg = "disk not found"
5545
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
5546
                                 (name, msg))
5547

    
5548
      if result.payload[5]:
5549
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
5550

    
5551
  def _RemoveOldStorage(self, node_name, iv_names):
5552
    for name, (dev, old_lvs, _) in iv_names.iteritems():
5553
      self.lu.LogInfo("Remove logical volumes for %s" % name)
5554

    
5555
      for lv in old_lvs:
5556
        self.cfg.SetDiskID(lv, node_name)
5557

    
5558
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
5559
        if msg:
5560
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
5561
                             hint="remove unused LVs manually")
5562

    
5563
  def _ExecDrbd8DiskOnly(self):
5564
    """Replace a disk on the primary or secondary for DRBD 8.
5565

5566
    The algorithm for replace is quite complicated:
5567

5568
      1. for each disk to be replaced:
5569

5570
        1. create new LVs on the target node with unique names
5571
        1. detach old LVs from the drbd device
5572
        1. rename old LVs to name_replaced.<time_t>
5573
        1. rename new LVs to old LVs
5574
        1. attach the new LVs (with the old names now) to the drbd device
5575

5576
      1. wait for sync across all devices
5577

5578
      1. for each modified disk:
5579

5580
        1. remove old LVs (which have the name name_replaces.<time_t>)
5581

5582
    Failures are not very well handled.
5583

5584
    """
5585
    steps_total = 6
5586

    
5587
    # Step: check device activation
5588
    self.lu.LogStep(1, steps_total, "Check device existence")
5589
    self._CheckDisksExistence([self.other_node, self.target_node])
5590
    self._CheckVolumeGroup([self.target_node, self.other_node])
5591

    
5592
    # Step: check other node consistency
5593
    self.lu.LogStep(2, steps_total, "Check peer consistency")
5594
    self._CheckDisksConsistency(self.other_node,
5595
                                self.other_node == self.instance.primary_node,
5596
                                False)
5597

    
5598
    # Step: create new storage
5599
    self.lu.LogStep(3, steps_total, "Allocate new storage")
5600
    iv_names = self._CreateNewStorage(self.target_node)
5601

    
5602
    # Step: for each lv, detach+rename*2+attach
5603
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
5604
    for dev, old_lvs, new_lvs in iv_names.itervalues():
5605
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
5606

    
5607
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev, old_lvs)
5608
      result.Raise("Can't detach drbd from local storage on node"
5609
                   " %s for device %s" % (self.target_node, dev.iv_name))
5610
      #dev.children = []
5611
      #cfg.Update(instance)
5612

    
5613
      # ok, we created the new LVs, so now we know we have the needed
5614
      # storage; as such, we proceed on the target node to rename
5615
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
5616
      # using the assumption that logical_id == physical_id (which in
5617
      # turn is the unique_id on that node)
5618

    
5619
      # FIXME(iustin): use a better name for the replaced LVs
5620
      temp_suffix = int(time.time())
5621
      ren_fn = lambda d, suff: (d.physical_id[0],
5622
                                d.physical_id[1] + "_replaced-%s" % suff)
5623

    
5624
      # Build the rename list based on what LVs exist on the node
5625
      rename_old_to_new = []
5626
      for to_ren in old_lvs:
5627
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
5628
        if not result.fail_msg and result.payload:
5629
          # device exists
5630
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
5631

    
5632
      self.lu.LogInfo("Renaming the old LVs on the target node")
5633
      result = self.rpc.call_blockdev_rename(self.target_node, rename_old_to_new)
5634
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
5635

    
5636
      # Now we rename the new LVs to the old LVs
5637
      self.lu.LogInfo("Renaming the new LVs on the target node")
5638
      rename_new_to_old = [(new, old.physical_id)
5639
                           for old, new in zip(old_lvs, new_lvs)]
5640
      result = self.rpc.call_blockdev_rename(self.target_node, rename_new_to_old)
5641
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
5642

    
5643
      for old, new in zip(old_lvs, new_lvs):
5644
        new.logical_id = old.logical_id
5645
        self.cfg.SetDiskID(new, self.target_node)
5646

    
5647
      for disk in old_lvs:
5648
        disk.logical_id = ren_fn(disk, temp_suffix)
5649
        self.cfg.SetDiskID(disk, self.target_node)
5650

    
5651
      # Now that the new lvs have the old name, we can add them to the device
5652
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
5653
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev, new_lvs)
5654
      msg = result.fail_msg
5655
      if msg:
5656
        for new_lv in new_lvs:
5657
          msg2 = self.rpc.call_blockdev_remove(self.target_node, new_lv).fail_msg
5658
          if msg2:
5659
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
5660
                               hint=("cleanup manually the unused logical"
5661
                                     "volumes"))
5662
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
5663

    
5664
      dev.children = new_lvs
5665

    
5666
      self.cfg.Update(self.instance)
5667

    
5668
    # Wait for sync
5669
    # This can fail as the old devices are degraded and _WaitForSync
5670
    # does a combined result over all disks, so we don't check its return value
5671
    self.lu.LogStep(5, steps_total, "Sync devices")
5672
    _WaitForSync(self.lu, self.instance, unlock=True)
5673

    
5674
    # Check all devices manually
5675
    self._CheckDevices(self.instance.primary_node, iv_names)
5676

    
5677
    # Step: remove old storage
5678
    self.lu.LogStep(6, steps_total, "Removing old storage")
5679
    self._RemoveOldStorage(self.target_node, iv_names)
5680

    
5681
  def _ExecDrbd8Secondary(self):
5682
    """Replace the secondary node for DRBD 8.
5683

5684
    The algorithm for replace is quite complicated:
5685
      - for all disks of the instance:
5686
        - create new LVs on the new node with same names
5687
        - shutdown the drbd device on the old secondary
5688
        - disconnect the drbd network on the primary
5689
        - create the drbd device on the new secondary
5690
        - network attach the drbd on the primary, using an artifice:
5691
          the drbd code for Attach() will connect to the network if it
5692
          finds a device which is connected to the good local disks but
5693
          not network enabled
5694
      - wait for sync across all devices
5695
      - remove all disks from the old secondary
5696

5697
    Failures are not very well handled.
5698

5699
    """
5700
    steps_total = 6
5701

    
5702
    # Step: check device activation
5703
    self.lu.LogStep(1, steps_total, "Check device existence")
5704
    self._CheckDisksExistence([self.instance.primary_node])
5705
    self._CheckVolumeGroup([self.instance.primary_node])
5706

    
5707
    # Step: check other node consistency
5708
    self.lu.LogStep(2, steps_total, "Check peer consistency")
5709
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
5710

    
5711
    # Step: create new storage
5712
    self.lu.LogStep(3, steps_total, "Allocate new storage")
5713
    for idx, dev in enumerate(self.instance.disks):
5714
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
5715
                      (self.new_node, idx))
5716
      # we pass force_create=True to force LVM creation
5717
      for new_lv in dev.children:
5718
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
5719
                        _GetInstanceInfoText(self.instance), False)
5720

    
5721
    # Step 4: dbrd minors and drbd setups changes
5722
    # after this, we must manually remove the drbd minors on both the
5723
    # error and the success paths
5724
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
5725
    minors = self.cfg.AllocateDRBDMinor([self.new_node for dev in self.instance.disks],
5726
                                        self.instance.name)
5727
    logging.debug("Allocated minors %r" % (minors,))
5728

    
5729
    iv_names = {}
5730
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
5731
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" % (self.new_node, idx))
5732
      # create new devices on new_node; note that we create two IDs:
5733
      # one without port, so the drbd will be activated without
5734
      # networking information on the new node at this stage, and one
5735
      # with network, for the latter activation in step 4
5736
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
5737
      if self.instance.primary_node == o_node1:
5738
        p_minor = o_minor1
5739
      else:
5740
        p_minor = o_minor2
5741

    
5742
      new_alone_id = (self.instance.primary_node, self.new_node, None, p_minor, new_minor, o_secret)
5743
      new_net_id = (self.instance.primary_node, self.new_node, o_port, p_minor, new_minor, o_secret)
5744

    
5745
      iv_names[idx] = (dev, dev.children, new_net_id)
5746
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
5747
                    new_net_id)
5748
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
5749
                              logical_id=new_alone_id,
5750
                              children=dev.children,
5751
                              size=dev.size)
5752
      try:
5753
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
5754
                              _GetInstanceInfoText(self.instance), False)
5755
      except errors.GenericError:
5756
        self.cfg.ReleaseDRBDMinors(self.instance.name)
5757
        raise
5758

    
5759
    # We have new devices, shutdown the drbd on the old secondary
5760
    for idx, dev in enumerate(self.instance.disks):
5761
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
5762
      self.cfg.SetDiskID(dev, self.target_node)
5763
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
5764
      if msg:
5765
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
5766
                           "node: %s" % (idx, msg),
5767
                           hint=("Please cleanup this device manually as"
5768
                                 " soon as possible"))
5769

    
5770
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
5771
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node], self.node_secondary_ip,
5772
                                               self.instance.disks)[self.instance.primary_node]
5773

    
5774
    msg = result.fail_msg
5775
    if msg:
5776
      # detaches didn't succeed (unlikely)
5777
      self.cfg.ReleaseDRBDMinors(self.instance.name)
5778
      raise errors.OpExecError("Can't detach the disks from the network on"
5779
                               " old node: %s" % (msg,))
5780

    
5781
    # if we managed to detach at least one, we update all the disks of
5782
    # the instance to point to the new secondary
5783
    self.lu.LogInfo("Updating instance configuration")
5784
    for dev, _, new_logical_id in iv_names.itervalues():
5785
      dev.logical_id = new_logical_id
5786
      self.cfg.SetDiskID(dev, self.instance.primary_node)
5787

    
5788
    self.cfg.Update(self.instance)
5789

    
5790
    # and now perform the drbd attach
5791
    self.lu.LogInfo("Attaching primary drbds to new secondary"
5792
                    " (standalone => connected)")
5793
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node, self.new_node], self.node_secondary_ip,
5794
                                           self.instance.disks, self.instance.name,
5795
                                           False)
5796
    for to_node, to_result in result.items():
5797
      msg = to_result.fail_msg
5798
      if msg:
5799
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s", to_node, msg,
5800
                           hint=("please do a gnt-instance info to see the"
5801
                                 " status of disks"))
5802

    
5803
    # Wait for sync
5804
    # This can fail as the old devices are degraded and _WaitForSync
5805
    # does a combined result over all disks, so we don't check its return value
5806
    self.lu.LogStep(5, steps_total, "Sync devices")
5807
    _WaitForSync(self.lu, self.instance, unlock=True)
5808

    
5809
    # Check all devices manually
5810
    self._CheckDevices(self.instance.primary_node, iv_names)
5811

    
5812
    # Step: remove old storage
5813
    self.lu.LogStep(6, steps_total, "Removing old storage")
5814
    self._RemoveOldStorage(self.target_node, iv_names)
5815

    
5816

    
5817
class LUGrowDisk(LogicalUnit):
5818
  """Grow a disk of an instance.
5819

5820
  """
5821
  HPATH = "disk-grow"
5822
  HTYPE = constants.HTYPE_INSTANCE
5823
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
5824
  REQ_BGL = False
5825

    
5826
  def ExpandNames(self):
5827
    self._ExpandAndLockInstance()
5828
    self.needed_locks[locking.LEVEL_NODE] = []
5829
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5830

    
5831
  def DeclareLocks(self, level):
5832
    if level == locking.LEVEL_NODE:
5833
      self._LockInstancesNodes()
5834

    
5835
  def BuildHooksEnv(self):
5836
    """Build hooks env.
5837

5838
    This runs on the master, the primary and all the secondaries.
5839

5840
    """
5841
    env = {
5842
      "DISK": self.op.disk,
5843
      "AMOUNT": self.op.amount,
5844
      }
5845
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5846
    nl = [
5847
      self.cfg.GetMasterNode(),
5848
      self.instance.primary_node,
5849
      ]
5850
    return env, nl, nl
5851

    
5852
  def CheckPrereq(self):
5853
    """Check prerequisites.
5854

5855
    This checks that the instance is in the cluster.
5856

5857
    """
5858
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5859
    assert instance is not None, \
5860
      "Cannot retrieve locked instance %s" % self.op.instance_name
5861
    nodenames = list(instance.all_nodes)
5862
    for node in nodenames:
5863
      _CheckNodeOnline(self, node)
5864

    
5865

    
5866
    self.instance = instance
5867

    
5868
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
5869
      raise errors.OpPrereqError("Instance's disk layout does not support"
5870
                                 " growing.")
5871

    
5872
    self.disk = instance.FindDisk(self.op.disk)
5873

    
5874
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
5875
                                       instance.hypervisor)
5876
    for node in nodenames:
5877
      info = nodeinfo[node]
5878
      info.Raise("Cannot get current information from node %s" % node)
5879
      vg_free = info.payload.get('vg_free', None)
5880
      if not isinstance(vg_free, int):
5881
        raise errors.OpPrereqError("Can't compute free disk space on"
5882
                                   " node %s" % node)
5883
      if self.op.amount > vg_free:
5884
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
5885
                                   " %d MiB available, %d MiB required" %
5886
                                   (node, vg_free, self.op.amount))
5887

    
5888
  def Exec(self, feedback_fn):
5889
    """Execute disk grow.
5890

5891
    """
5892
    instance = self.instance
5893
    disk = self.disk
5894
    for node in instance.all_nodes:
5895
      self.cfg.SetDiskID(disk, node)
5896
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
5897
      result.Raise("Grow request failed to node %s" % node)
5898
    disk.RecordGrow(self.op.amount)
5899
    self.cfg.Update(instance)
5900
    if self.op.wait_for_sync:
5901
      disk_abort = not _WaitForSync(self, instance)
5902
      if disk_abort:
5903
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
5904
                             " status.\nPlease check the instance.")
5905

    
5906

    
5907
class LUQueryInstanceData(NoHooksLU):
5908
  """Query runtime instance data.
5909

5910
  """
5911
  _OP_REQP = ["instances", "static"]
5912
  REQ_BGL = False
5913

    
5914
  def ExpandNames(self):
5915
    self.needed_locks = {}
5916
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
5917

    
5918
    if not isinstance(self.op.instances, list):
5919
      raise errors.OpPrereqError("Invalid argument type 'instances'")
5920

    
5921
    if self.op.instances:
5922
      self.wanted_names = []
5923
      for name in self.op.instances:
5924
        full_name = self.cfg.ExpandInstanceName(name)
5925
        if full_name is None:
5926
          raise errors.OpPrereqError("Instance '%s' not known" % name)
5927
        self.wanted_names.append(full_name)
5928
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
5929
    else:
5930
      self.wanted_names = None
5931
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5932

    
5933
    self.needed_locks[locking.LEVEL_NODE] = []
5934
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5935

    
5936
  def DeclareLocks(self, level):
5937
    if level == locking.LEVEL_NODE:
5938
      self._LockInstancesNodes()
5939

    
5940
  def CheckPrereq(self):
5941
    """Check prerequisites.
5942

5943
    This only checks the optional instance list against the existing names.
5944

5945
    """
5946
    if self.wanted_names is None:
5947
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5948

    
5949
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
5950
                             in self.wanted_names]
5951
    return
5952

    
5953
  def _ComputeDiskStatus(self, instance, snode, dev):
5954
    """Compute block device status.
5955

5956
    """
5957
    static = self.op.static
5958
    if not static:
5959
      self.cfg.SetDiskID(dev, instance.primary_node)
5960
      dev_pstatus = self.rpc.call_blockdev_find(instance.primary_node, dev)
5961
      if dev_pstatus.offline:
5962
        dev_pstatus = None
5963
      else:
5964
        dev_pstatus.Raise("Can't compute disk status for %s" % instance.name)
5965
        dev_pstatus = dev_pstatus.payload
5966
    else:
5967
      dev_pstatus = None
5968

    
5969
    if dev.dev_type in constants.LDS_DRBD:
5970
      # we change the snode then (otherwise we use the one passed in)
5971
      if dev.logical_id[0] == instance.primary_node:
5972
        snode = dev.logical_id[1]
5973
      else:
5974
        snode = dev.logical_id[0]
5975

    
5976
    if snode and not static:
5977
      self.cfg.SetDiskID(dev, snode)
5978
      dev_sstatus = self.rpc.call_blockdev_find(snode, dev)
5979
      if dev_sstatus.offline:
5980
        dev_sstatus = None
5981
      else:
5982
        dev_sstatus.Raise("Can't compute disk status for %s" % instance.name)
5983
        dev_sstatus = dev_sstatus.payload
5984
    else:
5985
      dev_sstatus = None
5986

    
5987
    if dev.children:
5988
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
5989
                      for child in dev.children]
5990
    else:
5991
      dev_children = []
5992

    
5993
    data = {
5994
      "iv_name": dev.iv_name,
5995
      "dev_type": dev.dev_type,
5996
      "logical_id": dev.logical_id,
5997
      "physical_id": dev.physical_id,
5998
      "pstatus": dev_pstatus,
5999
      "sstatus": dev_sstatus,
6000
      "children": dev_children,
6001
      "mode": dev.mode,
6002
      "size": dev.size,
6003
      }
6004

    
6005
    return data
6006

    
6007
  def Exec(self, feedback_fn):
6008
    """Gather and return data"""
6009
    result = {}
6010

    
6011
    cluster = self.cfg.GetClusterInfo()
6012

    
6013
    for instance in self.wanted_instances:
6014
      if not self.op.static:
6015
        remote_info = self.rpc.call_instance_info(instance.primary_node,
6016
                                                  instance.name,
6017
                                                  instance.hypervisor)
6018
        remote_info.Raise("Error checking node %s" % instance.primary_node)
6019
        remote_info = remote_info.payload
6020
        if remote_info and "state" in remote_info:
6021
          remote_state = "up"
6022
        else:
6023
          remote_state = "down"
6024
      else:
6025
        remote_state = None
6026
      if instance.admin_up:
6027
        config_state = "up"
6028
      else:
6029
        config_state = "down"
6030

    
6031
      disks = [self._ComputeDiskStatus(instance, None, device)
6032
               for device in instance.disks]
6033

    
6034
      idict = {
6035
        "name": instance.name,
6036
        "config_state": config_state,
6037
        "run_state": remote_state,
6038
        "pnode": instance.primary_node,
6039
        "snodes": instance.secondary_nodes,
6040
        "os": instance.os,
6041
        # this happens to be the same format used for hooks
6042
        "nics": _NICListToTuple(self, instance.nics),
6043
        "disks": disks,
6044
        "hypervisor": instance.hypervisor,
6045
        "network_port": instance.network_port,
6046
        "hv_instance": instance.hvparams,
6047
        "hv_actual": cluster.FillHV(instance),
6048
        "be_instance": instance.beparams,
6049
        "be_actual": cluster.FillBE(instance),
6050
        }
6051

    
6052
      result[instance.name] = idict
6053

    
6054
    return result
6055

    
6056

    
6057
class LUSetInstanceParams(LogicalUnit):
6058
  """Modifies an instances's parameters.
6059

6060
  """
6061
  HPATH = "instance-modify"
6062
  HTYPE = constants.HTYPE_INSTANCE
6063
  _OP_REQP = ["instance_name"]
6064
  REQ_BGL = False
6065

    
6066
  def CheckArguments(self):
6067
    if not hasattr(self.op, 'nics'):
6068
      self.op.nics = []
6069
    if not hasattr(self.op, 'disks'):
6070
      self.op.disks = []
6071
    if not hasattr(self.op, 'beparams'):
6072
      self.op.beparams = {}
6073
    if not hasattr(self.op, 'hvparams'):
6074
      self.op.hvparams = {}
6075
    self.op.force = getattr(self.op, "force", False)
6076
    if not (self.op.nics or self.op.disks or
6077
            self.op.hvparams or self.op.beparams):
6078
      raise errors.OpPrereqError("No changes submitted")
6079

    
6080
    # Disk validation
6081
    disk_addremove = 0
6082
    for disk_op, disk_dict in self.op.disks:
6083
      if disk_op == constants.DDM_REMOVE:
6084
        disk_addremove += 1
6085
        continue
6086
      elif disk_op == constants.DDM_ADD:
6087
        disk_addremove += 1
6088
      else:
6089
        if not isinstance(disk_op, int):
6090
          raise errors.OpPrereqError("Invalid disk index")
6091
        if not isinstance(disk_dict, dict):
6092
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
6093
          raise errors.OpPrereqError(msg)
6094

    
6095
      if disk_op == constants.DDM_ADD:
6096
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
6097
        if mode not in constants.DISK_ACCESS_SET:
6098
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
6099
        size = disk_dict.get('size', None)
6100
        if size is None:
6101
          raise errors.OpPrereqError("Required disk parameter size missing")
6102
        try:
6103
          size = int(size)
6104
        except ValueError, err:
6105
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
6106
                                     str(err))
6107
        disk_dict['size'] = size
6108
      else:
6109
        # modification of disk
6110
        if 'size' in disk_dict:
6111
          raise errors.OpPrereqError("Disk size change not possible, use"
6112
                                     " grow-disk")
6113

    
6114
    if disk_addremove > 1:
6115
      raise errors.OpPrereqError("Only one disk add or remove operation"
6116
                                 " supported at a time")
6117

    
6118
    # NIC validation
6119
    nic_addremove = 0
6120
    for nic_op, nic_dict in self.op.nics:
6121
      if nic_op == constants.DDM_REMOVE:
6122
        nic_addremove += 1
6123
        continue
6124
      elif nic_op == constants.DDM_ADD:
6125
        nic_addremove += 1
6126
      else:
6127
        if not isinstance(nic_op, int):
6128
          raise errors.OpPrereqError("Invalid nic index")
6129
        if not isinstance(nic_dict, dict):
6130
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
6131
          raise errors.OpPrereqError(msg)
6132

    
6133
      # nic_dict should be a dict
6134
      nic_ip = nic_dict.get('ip', None)
6135
      if nic_ip is not None:
6136
        if nic_ip.lower() == constants.VALUE_NONE:
6137
          nic_dict['ip'] = None
6138
        else:
6139
          if not utils.IsValidIP(nic_ip):
6140
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
6141

    
6142
      nic_bridge = nic_dict.get('bridge', None)
6143
      nic_link = nic_dict.get('link', None)
6144
      if nic_bridge and nic_link:
6145
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6146
                                   " at the same time")
6147
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
6148
        nic_dict['bridge'] = None
6149
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
6150
        nic_dict['link'] = None
6151

    
6152
      if nic_op == constants.DDM_ADD:
6153
        nic_mac = nic_dict.get('mac', None)
6154
        if nic_mac is None:
6155
          nic_dict['mac'] = constants.VALUE_AUTO
6156

    
6157
      if 'mac' in nic_dict:
6158
        nic_mac = nic_dict['mac']
6159
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6160
          if not utils.IsValidMac(nic_mac):
6161
            raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
6162
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
6163
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
6164
                                     " modifying an existing nic")
6165

    
6166
    if nic_addremove > 1:
6167
      raise errors.OpPrereqError("Only one NIC add or remove operation"
6168
                                 " supported at a time")
6169

    
6170
  def ExpandNames(self):
6171
    self._ExpandAndLockInstance()
6172
    self.needed_locks[locking.LEVEL_NODE] = []
6173
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6174

    
6175
  def DeclareLocks(self, level):
6176
    if level == locking.LEVEL_NODE:
6177
      self._LockInstancesNodes()
6178

    
6179
  def BuildHooksEnv(self):
6180
    """Build hooks env.
6181

6182
    This runs on the master, primary and secondaries.
6183

6184
    """
6185
    args = dict()
6186
    if constants.BE_MEMORY in self.be_new:
6187
      args['memory'] = self.be_new[constants.BE_MEMORY]
6188
    if constants.BE_VCPUS in self.be_new:
6189
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
6190
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
6191
    # information at all.
6192
    if self.op.nics:
6193
      args['nics'] = []
6194
      nic_override = dict(self.op.nics)
6195
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
6196
      for idx, nic in enumerate(self.instance.nics):
6197
        if idx in nic_override:
6198
          this_nic_override = nic_override[idx]
6199
        else:
6200
          this_nic_override = {}
6201
        if 'ip' in this_nic_override:
6202
          ip = this_nic_override['ip']
6203
        else:
6204
          ip = nic.ip
6205
        if 'mac' in this_nic_override:
6206
          mac = this_nic_override['mac']
6207
        else:
6208
          mac = nic.mac
6209
        if idx in self.nic_pnew:
6210
          nicparams = self.nic_pnew[idx]
6211
        else:
6212
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
6213
        mode = nicparams[constants.NIC_MODE]
6214
        link = nicparams[constants.NIC_LINK]
6215
        args['nics'].append((ip, mac, mode, link))
6216
      if constants.DDM_ADD in nic_override:
6217
        ip = nic_override[constants.DDM_ADD].get('ip', None)
6218
        mac = nic_override[constants.DDM_ADD]['mac']
6219
        nicparams = self.nic_pnew[constants.DDM_ADD]
6220
        mode = nicparams[constants.NIC_MODE]
6221
        link = nicparams[constants.NIC_LINK]
6222
        args['nics'].append((ip, mac, mode, link))
6223
      elif constants.DDM_REMOVE in nic_override:
6224
        del args['nics'][-1]
6225

    
6226
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
6227
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6228
    return env, nl, nl
6229

    
6230
  def _GetUpdatedParams(self, old_params, update_dict,
6231
                        default_values, parameter_types):
6232
    """Return the new params dict for the given params.
6233

6234
    @type old_params: dict
6235
    @param old_params: old parameters
6236
    @type update_dict: dict
6237
    @param update_dict: dict containing new parameter values,
6238
                        or constants.VALUE_DEFAULT to reset the
6239
                        parameter to its default value
6240
    @type default_values: dict
6241
    @param default_values: default values for the filled parameters
6242
    @type parameter_types: dict
6243
    @param parameter_types: dict mapping target dict keys to types
6244
                            in constants.ENFORCEABLE_TYPES
6245
    @rtype: (dict, dict)
6246
    @return: (new_parameters, filled_parameters)
6247

6248
    """
6249
    params_copy = copy.deepcopy(old_params)
6250
    for key, val in update_dict.iteritems():
6251
      if val == constants.VALUE_DEFAULT:
6252
        try:
6253
          del params_copy[key]
6254
        except KeyError:
6255
          pass
6256
      else:
6257
        params_copy[key] = val
6258
    utils.ForceDictType(params_copy, parameter_types)
6259
    params_filled = objects.FillDict(default_values, params_copy)
6260
    return (params_copy, params_filled)
6261

    
6262
  def CheckPrereq(self):
6263
    """Check prerequisites.
6264

6265
    This only checks the instance list against the existing names.
6266

6267
    """
6268
    self.force = self.op.force
6269

    
6270
    # checking the new params on the primary/secondary nodes
6271

    
6272
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6273
    cluster = self.cluster = self.cfg.GetClusterInfo()
6274
    assert self.instance is not None, \
6275
      "Cannot retrieve locked instance %s" % self.op.instance_name
6276
    pnode = instance.primary_node
6277
    nodelist = list(instance.all_nodes)
6278

    
6279
    # hvparams processing
6280
    if self.op.hvparams:
6281
      i_hvdict, hv_new = self._GetUpdatedParams(
6282
                             instance.hvparams, self.op.hvparams,
6283
                             cluster.hvparams[instance.hypervisor],
6284
                             constants.HVS_PARAMETER_TYPES)
6285
      # local check
6286
      hypervisor.GetHypervisor(
6287
        instance.hypervisor).CheckParameterSyntax(hv_new)
6288
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
6289
      self.hv_new = hv_new # the new actual values
6290
      self.hv_inst = i_hvdict # the new dict (without defaults)
6291
    else:
6292
      self.hv_new = self.hv_inst = {}
6293

    
6294
    # beparams processing
6295
    if self.op.beparams:
6296
      i_bedict, be_new = self._GetUpdatedParams(
6297
                             instance.beparams, self.op.beparams,
6298
                             cluster.beparams[constants.PP_DEFAULT],
6299
                             constants.BES_PARAMETER_TYPES)
6300
      self.be_new = be_new # the new actual values
6301
      self.be_inst = i_bedict # the new dict (without defaults)
6302
    else:
6303
      self.be_new = self.be_inst = {}
6304

    
6305
    self.warn = []
6306

    
6307
    if constants.BE_MEMORY in self.op.beparams and not self.force:
6308
      mem_check_list = [pnode]
6309
      if be_new[constants.BE_AUTO_BALANCE]:
6310
        # either we changed auto_balance to yes or it was from before
6311
        mem_check_list.extend(instance.secondary_nodes)
6312
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
6313
                                                  instance.hypervisor)
6314
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
6315
                                         instance.hypervisor)
6316
      pninfo = nodeinfo[pnode]
6317
      msg = pninfo.fail_msg
6318
      if msg:
6319
        # Assume the primary node is unreachable and go ahead
6320
        self.warn.append("Can't get info from primary node %s: %s" %
6321
                         (pnode,  msg))
6322
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
6323
        self.warn.append("Node data from primary node %s doesn't contain"
6324
                         " free memory information" % pnode)
6325
      elif instance_info.fail_msg:
6326
        self.warn.append("Can't get instance runtime information: %s" %
6327
                        instance_info.fail_msg)
6328
      else:
6329
        if instance_info.payload:
6330
          current_mem = int(instance_info.payload['memory'])
6331
        else:
6332
          # Assume instance not running
6333
          # (there is a slight race condition here, but it's not very probable,
6334
          # and we have no other way to check)
6335
          current_mem = 0
6336
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
6337
                    pninfo.payload['memory_free'])
6338
        if miss_mem > 0:
6339
          raise errors.OpPrereqError("This change will prevent the instance"
6340
                                     " from starting, due to %d MB of memory"
6341
                                     " missing on its primary node" % miss_mem)
6342

    
6343
      if be_new[constants.BE_AUTO_BALANCE]:
6344
        for node, nres in nodeinfo.items():
6345
          if node not in instance.secondary_nodes:
6346
            continue
6347
          msg = nres.fail_msg
6348
          if msg:
6349
            self.warn.append("Can't get info from secondary node %s: %s" %
6350
                             (node, msg))
6351
          elif not isinstance(nres.payload.get('memory_free', None), int):
6352
            self.warn.append("Secondary node %s didn't return free"
6353
                             " memory information" % node)
6354
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
6355
            self.warn.append("Not enough memory to failover instance to"
6356
                             " secondary node %s" % node)
6357

    
6358
    # NIC processing
6359
    self.nic_pnew = {}
6360
    self.nic_pinst = {}
6361
    for nic_op, nic_dict in self.op.nics:
6362
      if nic_op == constants.DDM_REMOVE:
6363
        if not instance.nics:
6364
          raise errors.OpPrereqError("Instance has no NICs, cannot remove")
6365
        continue
6366
      if nic_op != constants.DDM_ADD:
6367
        # an existing nic
6368
        if nic_op < 0 or nic_op >= len(instance.nics):
6369
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
6370
                                     " are 0 to %d" %
6371
                                     (nic_op, len(instance.nics)))
6372
        old_nic_params = instance.nics[nic_op].nicparams
6373
        old_nic_ip = instance.nics[nic_op].ip
6374
      else:
6375
        old_nic_params = {}
6376
        old_nic_ip = None
6377

    
6378
      update_params_dict = dict([(key, nic_dict[key])
6379
                                 for key in constants.NICS_PARAMETERS
6380
                                 if key in nic_dict])
6381

    
6382
      if 'bridge' in nic_dict:
6383
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
6384

    
6385
      new_nic_params, new_filled_nic_params = \
6386
          self._GetUpdatedParams(old_nic_params, update_params_dict,
6387
                                 cluster.nicparams[constants.PP_DEFAULT],
6388
                                 constants.NICS_PARAMETER_TYPES)
6389
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
6390
      self.nic_pinst[nic_op] = new_nic_params
6391
      self.nic_pnew[nic_op] = new_filled_nic_params
6392
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
6393

    
6394
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
6395
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
6396
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
6397
        if msg:
6398
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
6399
          if self.force:
6400
            self.warn.append(msg)
6401
          else:
6402
            raise errors.OpPrereqError(msg)
6403
      if new_nic_mode == constants.NIC_MODE_ROUTED:
6404
        if 'ip' in nic_dict:
6405
          nic_ip = nic_dict['ip']
6406
        else:
6407
          nic_ip = old_nic_ip
6408
        if nic_ip is None:
6409
          raise errors.OpPrereqError('Cannot set the nic ip to None'
6410
                                     ' on a routed nic')
6411
      if 'mac' in nic_dict:
6412
        nic_mac = nic_dict['mac']
6413
        if nic_mac is None:
6414
          raise errors.OpPrereqError('Cannot set the nic mac to None')
6415
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6416
          # otherwise generate the mac
6417
          nic_dict['mac'] = self.cfg.GenerateMAC()
6418
        else:
6419
          # or validate/reserve the current one
6420
          if self.cfg.IsMacInUse(nic_mac):
6421
            raise errors.OpPrereqError("MAC address %s already in use"
6422
                                       " in cluster" % nic_mac)
6423

    
6424
    # DISK processing
6425
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
6426
      raise errors.OpPrereqError("Disk operations not supported for"
6427
                                 " diskless instances")
6428
    for disk_op, disk_dict in self.op.disks:
6429
      if disk_op == constants.DDM_REMOVE:
6430
        if len(instance.disks) == 1:
6431
          raise errors.OpPrereqError("Cannot remove the last disk of"
6432
                                     " an instance")
6433
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
6434
        ins_l = ins_l[pnode]
6435
        msg = ins_l.fail_msg
6436
        if msg:
6437
          raise errors.OpPrereqError("Can't contact node %s: %s" %
6438
                                     (pnode, msg))
6439
        if instance.name in ins_l.payload:
6440
          raise errors.OpPrereqError("Instance is running, can't remove"
6441
                                     " disks.")
6442

    
6443
      if (disk_op == constants.DDM_ADD and
6444
          len(instance.nics) >= constants.MAX_DISKS):
6445
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
6446
                                   " add more" % constants.MAX_DISKS)
6447
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
6448
        # an existing disk
6449
        if disk_op < 0 or disk_op >= len(instance.disks):
6450
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
6451
                                     " are 0 to %d" %
6452
                                     (disk_op, len(instance.disks)))
6453

    
6454
    return
6455

    
6456
  def Exec(self, feedback_fn):
6457
    """Modifies an instance.
6458

6459
    All parameters take effect only at the next restart of the instance.
6460

6461
    """
6462
    # Process here the warnings from CheckPrereq, as we don't have a
6463
    # feedback_fn there.
6464
    for warn in self.warn:
6465
      feedback_fn("WARNING: %s" % warn)
6466

    
6467
    result = []
6468
    instance = self.instance
6469
    cluster = self.cluster
6470
    # disk changes
6471
    for disk_op, disk_dict in self.op.disks:
6472
      if disk_op == constants.DDM_REMOVE:
6473
        # remove the last disk
6474
        device = instance.disks.pop()
6475
        device_idx = len(instance.disks)
6476
        for node, disk in device.ComputeNodeTree(instance.primary_node):
6477
          self.cfg.SetDiskID(disk, node)
6478
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
6479
          if msg:
6480
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
6481
                            " continuing anyway", device_idx, node, msg)
6482
        result.append(("disk/%d" % device_idx, "remove"))
6483
      elif disk_op == constants.DDM_ADD:
6484
        # add a new disk
6485
        if instance.disk_template == constants.DT_FILE:
6486
          file_driver, file_path = instance.disks[0].logical_id
6487
          file_path = os.path.dirname(file_path)
6488
        else:
6489
          file_driver = file_path = None
6490
        disk_idx_base = len(instance.disks)
6491
        new_disk = _GenerateDiskTemplate(self,
6492
                                         instance.disk_template,
6493
                                         instance.name, instance.primary_node,
6494
                                         instance.secondary_nodes,
6495
                                         [disk_dict],
6496
                                         file_path,
6497
                                         file_driver,
6498
                                         disk_idx_base)[0]
6499
        instance.disks.append(new_disk)
6500
        info = _GetInstanceInfoText(instance)
6501

    
6502
        logging.info("Creating volume %s for instance %s",
6503
                     new_disk.iv_name, instance.name)
6504
        # Note: this needs to be kept in sync with _CreateDisks
6505
        #HARDCODE
6506
        for node in instance.all_nodes:
6507
          f_create = node == instance.primary_node
6508
          try:
6509
            _CreateBlockDev(self, node, instance, new_disk,
6510
                            f_create, info, f_create)
6511
          except errors.OpExecError, err:
6512
            self.LogWarning("Failed to create volume %s (%s) on"
6513
                            " node %s: %s",
6514
                            new_disk.iv_name, new_disk, node, err)
6515
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
6516
                       (new_disk.size, new_disk.mode)))
6517
      else:
6518
        # change a given disk
6519
        instance.disks[disk_op].mode = disk_dict['mode']
6520
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
6521
    # NIC changes
6522
    for nic_op, nic_dict in self.op.nics:
6523
      if nic_op == constants.DDM_REMOVE:
6524
        # remove the last nic
6525
        del instance.nics[-1]
6526
        result.append(("nic.%d" % len(instance.nics), "remove"))
6527
      elif nic_op == constants.DDM_ADD:
6528
        # mac and bridge should be set, by now
6529
        mac = nic_dict['mac']
6530
        ip = nic_dict.get('ip', None)
6531
        nicparams = self.nic_pinst[constants.DDM_ADD]
6532
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
6533
        instance.nics.append(new_nic)
6534
        result.append(("nic.%d" % (len(instance.nics) - 1),
6535
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
6536
                       (new_nic.mac, new_nic.ip,
6537
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
6538
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
6539
                       )))
6540
      else:
6541
        for key in 'mac', 'ip':
6542
          if key in nic_dict:
6543
            setattr(instance.nics[nic_op], key, nic_dict[key])
6544
        if nic_op in self.nic_pnew:
6545
          instance.nics[nic_op].nicparams = self.nic_pnew[nic_op]
6546
        for key, val in nic_dict.iteritems():
6547
          result.append(("nic.%s/%d" % (key, nic_op), val))
6548

    
6549
    # hvparams changes
6550
    if self.op.hvparams:
6551
      instance.hvparams = self.hv_inst
6552
      for key, val in self.op.hvparams.iteritems():
6553
        result.append(("hv/%s" % key, val))
6554

    
6555
    # beparams changes
6556
    if self.op.beparams:
6557
      instance.beparams = self.be_inst
6558
      for key, val in self.op.beparams.iteritems():
6559
        result.append(("be/%s" % key, val))
6560

    
6561
    self.cfg.Update(instance)
6562

    
6563
    return result
6564

    
6565

    
6566
class LUQueryExports(NoHooksLU):
6567
  """Query the exports list
6568

6569
  """
6570
  _OP_REQP = ['nodes']
6571
  REQ_BGL = False
6572

    
6573
  def ExpandNames(self):
6574
    self.needed_locks = {}
6575
    self.share_locks[locking.LEVEL_NODE] = 1
6576
    if not self.op.nodes:
6577
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6578
    else:
6579
      self.needed_locks[locking.LEVEL_NODE] = \
6580
        _GetWantedNodes(self, self.op.nodes)
6581

    
6582
  def CheckPrereq(self):
6583
    """Check prerequisites.
6584

6585
    """
6586
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
6587

    
6588
  def Exec(self, feedback_fn):
6589
    """Compute the list of all the exported system images.
6590

6591
    @rtype: dict
6592
    @return: a dictionary with the structure node->(export-list)
6593
        where export-list is a list of the instances exported on
6594
        that node.
6595

6596
    """
6597
    rpcresult = self.rpc.call_export_list(self.nodes)
6598
    result = {}
6599
    for node in rpcresult:
6600
      if rpcresult[node].fail_msg:
6601
        result[node] = False
6602
      else:
6603
        result[node] = rpcresult[node].payload
6604

    
6605
    return result
6606

    
6607

    
6608
class LUExportInstance(LogicalUnit):
6609
  """Export an instance to an image in the cluster.
6610

6611
  """
6612
  HPATH = "instance-export"
6613
  HTYPE = constants.HTYPE_INSTANCE
6614
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
6615
  REQ_BGL = False
6616

    
6617
  def ExpandNames(self):
6618
    self._ExpandAndLockInstance()
6619
    # FIXME: lock only instance primary and destination node
6620
    #
6621
    # Sad but true, for now we have do lock all nodes, as we don't know where
6622
    # the previous export might be, and and in this LU we search for it and
6623
    # remove it from its current node. In the future we could fix this by:
6624
    #  - making a tasklet to search (share-lock all), then create the new one,
6625
    #    then one to remove, after
6626
    #  - removing the removal operation altogether
6627
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6628

    
6629
  def DeclareLocks(self, level):
6630
    """Last minute lock declaration."""
6631
    # All nodes are locked anyway, so nothing to do here.
6632

    
6633
  def BuildHooksEnv(self):
6634
    """Build hooks env.
6635

6636
    This will run on the master, primary node and target node.
6637

6638
    """
6639
    env = {
6640
      "EXPORT_NODE": self.op.target_node,
6641
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
6642
      }
6643
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6644
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
6645
          self.op.target_node]
6646
    return env, nl, nl
6647

    
6648
  def CheckPrereq(self):
6649
    """Check prerequisites.
6650

6651
    This checks that the instance and node names are valid.
6652

6653
    """
6654
    instance_name = self.op.instance_name
6655
    self.instance = self.cfg.GetInstanceInfo(instance_name)
6656
    assert self.instance is not None, \
6657
          "Cannot retrieve locked instance %s" % self.op.instance_name
6658
    _CheckNodeOnline(self, self.instance.primary_node)
6659

    
6660
    self.dst_node = self.cfg.GetNodeInfo(
6661
      self.cfg.ExpandNodeName(self.op.target_node))
6662

    
6663
    if self.dst_node is None:
6664
      # This is wrong node name, not a non-locked node
6665
      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
6666
    _CheckNodeOnline(self, self.dst_node.name)
6667
    _CheckNodeNotDrained(self, self.dst_node.name)
6668

    
6669
    # instance disk type verification
6670
    for disk in self.instance.disks:
6671
      if disk.dev_type == constants.LD_FILE:
6672
        raise errors.OpPrereqError("Export not supported for instances with"
6673
                                   " file-based disks")
6674

    
6675
  def Exec(self, feedback_fn):
6676
    """Export an instance to an image in the cluster.
6677

6678
    """
6679
    instance = self.instance
6680
    dst_node = self.dst_node
6681
    src_node = instance.primary_node
6682
    if self.op.shutdown:
6683
      # shutdown the instance, but not the disks
6684
      result = self.rpc.call_instance_shutdown(src_node, instance)
6685
      result.Raise("Could not shutdown instance %s on"
6686
                   " node %s" % (instance.name, src_node))
6687

    
6688
    vgname = self.cfg.GetVGName()
6689

    
6690
    snap_disks = []
6691

    
6692
    # set the disks ID correctly since call_instance_start needs the
6693
    # correct drbd minor to create the symlinks
6694
    for disk in instance.disks:
6695
      self.cfg.SetDiskID(disk, src_node)
6696

    
6697
    try:
6698
      for idx, disk in enumerate(instance.disks):
6699
        # result.payload will be a snapshot of an lvm leaf of the one we passed
6700
        result = self.rpc.call_blockdev_snapshot(src_node, disk)
6701
        msg = result.fail_msg
6702
        if msg:
6703
          self.LogWarning("Could not snapshot disk/%s on node %s: %s",
6704
                          idx, src_node, msg)
6705
          snap_disks.append(False)
6706
        else:
6707
          disk_id = (vgname, result.payload)
6708
          new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
6709
                                 logical_id=disk_id, physical_id=disk_id,
6710
                                 iv_name=disk.iv_name)
6711
          snap_disks.append(new_dev)
6712

    
6713
    finally:
6714
      if self.op.shutdown and instance.admin_up:
6715
        result = self.rpc.call_instance_start(src_node, instance, None, None)
6716
        msg = result.fail_msg
6717
        if msg:
6718
          _ShutdownInstanceDisks(self, instance)
6719
          raise errors.OpExecError("Could not start instance: %s" % msg)
6720

    
6721
    # TODO: check for size
6722

    
6723
    cluster_name = self.cfg.GetClusterName()
6724
    for idx, dev in enumerate(snap_disks):
6725
      if dev:
6726
        result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
6727
                                               instance, cluster_name, idx)
6728
        msg = result.fail_msg
6729
        if msg:
6730
          self.LogWarning("Could not export disk/%s from node %s to"
6731
                          " node %s: %s", idx, src_node, dst_node.name, msg)
6732
        msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
6733
        if msg:
6734
          self.LogWarning("Could not remove snapshot for disk/%d from node"
6735
                          " %s: %s", idx, src_node, msg)
6736

    
6737
    result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
6738
    msg = result.fail_msg
6739
    if msg:
6740
      self.LogWarning("Could not finalize export for instance %s"
6741
                      " on node %s: %s", instance.name, dst_node.name, msg)
6742

    
6743
    nodelist = self.cfg.GetNodeList()
6744
    nodelist.remove(dst_node.name)
6745

    
6746
    # on one-node clusters nodelist will be empty after the removal
6747
    # if we proceed the backup would be removed because OpQueryExports
6748
    # substitutes an empty list with the full cluster node list.
6749
    iname = instance.name
6750
    if nodelist:
6751
      exportlist = self.rpc.call_export_list(nodelist)
6752
      for node in exportlist:
6753
        if exportlist[node].fail_msg:
6754
          continue
6755
        if iname in exportlist[node].payload:
6756
          msg = self.rpc.call_export_remove(node, iname).fail_msg
6757
          if msg:
6758
            self.LogWarning("Could not remove older export for instance %s"
6759
                            " on node %s: %s", iname, node, msg)
6760

    
6761

    
6762
class LURemoveExport(NoHooksLU):
6763
  """Remove exports related to the named instance.
6764

6765
  """
6766
  _OP_REQP = ["instance_name"]
6767
  REQ_BGL = False
6768

    
6769
  def ExpandNames(self):
6770
    self.needed_locks = {}
6771
    # We need all nodes to be locked in order for RemoveExport to work, but we
6772
    # don't need to lock the instance itself, as nothing will happen to it (and
6773
    # we can remove exports also for a removed instance)
6774
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6775

    
6776
  def CheckPrereq(self):
6777
    """Check prerequisites.
6778
    """
6779
    pass
6780

    
6781
  def Exec(self, feedback_fn):
6782
    """Remove any export.
6783

6784
    """
6785
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
6786
    # If the instance was not found we'll try with the name that was passed in.
6787
    # This will only work if it was an FQDN, though.
6788
    fqdn_warn = False
6789
    if not instance_name:
6790
      fqdn_warn = True
6791
      instance_name = self.op.instance_name
6792

    
6793
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6794
    exportlist = self.rpc.call_export_list(locked_nodes)
6795
    found = False
6796
    for node in exportlist:
6797
      msg = exportlist[node].fail_msg
6798
      if msg:
6799
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
6800
        continue
6801
      if instance_name in exportlist[node].payload:
6802
        found = True
6803
        result = self.rpc.call_export_remove(node, instance_name)
6804
        msg = result.fail_msg
6805
        if msg:
6806
          logging.error("Could not remove export for instance %s"
6807
                        " on node %s: %s", instance_name, node, msg)
6808

    
6809
    if fqdn_warn and not found:
6810
      feedback_fn("Export not found. If trying to remove an export belonging"
6811
                  " to a deleted instance please use its Fully Qualified"
6812
                  " Domain Name.")
6813

    
6814

    
6815
class TagsLU(NoHooksLU):
6816
  """Generic tags LU.
6817

6818
  This is an abstract class which is the parent of all the other tags LUs.
6819

6820
  """
6821

    
6822
  def ExpandNames(self):
6823
    self.needed_locks = {}
6824
    if self.op.kind == constants.TAG_NODE:
6825
      name = self.cfg.ExpandNodeName(self.op.name)
6826
      if name is None:
6827
        raise errors.OpPrereqError("Invalid node name (%s)" %
6828
                                   (self.op.name,))
6829
      self.op.name = name
6830
      self.needed_locks[locking.LEVEL_NODE] = name
6831
    elif self.op.kind == constants.TAG_INSTANCE:
6832
      name = self.cfg.ExpandInstanceName(self.op.name)
6833
      if name is None:
6834
        raise errors.OpPrereqError("Invalid instance name (%s)" %
6835
                                   (self.op.name,))
6836
      self.op.name = name
6837
      self.needed_locks[locking.LEVEL_INSTANCE] = name
6838

    
6839
  def CheckPrereq(self):
6840
    """Check prerequisites.
6841

6842
    """
6843
    if self.op.kind == constants.TAG_CLUSTER:
6844
      self.target = self.cfg.GetClusterInfo()
6845
    elif self.op.kind == constants.TAG_NODE:
6846
      self.target = self.cfg.GetNodeInfo(self.op.name)
6847
    elif self.op.kind == constants.TAG_INSTANCE:
6848
      self.target = self.cfg.GetInstanceInfo(self.op.name)
6849
    else:
6850
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
6851
                                 str(self.op.kind))
6852

    
6853

    
6854
class LUGetTags(TagsLU):
6855
  """Returns the tags of a given object.
6856

6857
  """
6858
  _OP_REQP = ["kind", "name"]
6859
  REQ_BGL = False
6860

    
6861
  def Exec(self, feedback_fn):
6862
    """Returns the tag list.
6863

6864
    """
6865
    return list(self.target.GetTags())
6866

    
6867

    
6868
class LUSearchTags(NoHooksLU):
6869
  """Searches the tags for a given pattern.
6870

6871
  """
6872
  _OP_REQP = ["pattern"]
6873
  REQ_BGL = False
6874

    
6875
  def ExpandNames(self):
6876
    self.needed_locks = {}
6877

    
6878
  def CheckPrereq(self):
6879
    """Check prerequisites.
6880

6881
    This checks the pattern passed for validity by compiling it.
6882

6883
    """
6884
    try:
6885
      self.re = re.compile(self.op.pattern)
6886
    except re.error, err:
6887
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
6888
                                 (self.op.pattern, err))
6889

    
6890
  def Exec(self, feedback_fn):
6891
    """Returns the tag list.
6892

6893
    """
6894
    cfg = self.cfg
6895
    tgts = [("/cluster", cfg.GetClusterInfo())]
6896
    ilist = cfg.GetAllInstancesInfo().values()
6897
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
6898
    nlist = cfg.GetAllNodesInfo().values()
6899
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
6900
    results = []
6901
    for path, target in tgts:
6902
      for tag in target.GetTags():
6903
        if self.re.search(tag):
6904
          results.append((path, tag))
6905
    return results
6906

    
6907

    
6908
class LUAddTags(TagsLU):
6909
  """Sets a tag on a given object.
6910

6911
  """
6912
  _OP_REQP = ["kind", "name", "tags"]
6913
  REQ_BGL = False
6914

    
6915
  def CheckPrereq(self):
6916
    """Check prerequisites.
6917

6918
    This checks the type and length of the tag name and value.
6919

6920
    """
6921
    TagsLU.CheckPrereq(self)
6922
    for tag in self.op.tags:
6923
      objects.TaggableObject.ValidateTag(tag)
6924

    
6925
  def Exec(self, feedback_fn):
6926
    """Sets the tag.
6927

6928
    """
6929
    try:
6930
      for tag in self.op.tags:
6931
        self.target.AddTag(tag)
6932
    except errors.TagError, err:
6933
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
6934
    try:
6935
      self.cfg.Update(self.target)
6936
    except errors.ConfigurationError:
6937
      raise errors.OpRetryError("There has been a modification to the"
6938
                                " config file and the operation has been"
6939
                                " aborted. Please retry.")
6940

    
6941

    
6942
class LUDelTags(TagsLU):
6943
  """Delete a list of tags from a given object.
6944

6945
  """
6946
  _OP_REQP = ["kind", "name", "tags"]
6947
  REQ_BGL = False
6948

    
6949
  def CheckPrereq(self):
6950
    """Check prerequisites.
6951

6952
    This checks that we have the given tag.
6953

6954
    """
6955
    TagsLU.CheckPrereq(self)
6956
    for tag in self.op.tags:
6957
      objects.TaggableObject.ValidateTag(tag)
6958
    del_tags = frozenset(self.op.tags)
6959
    cur_tags = self.target.GetTags()
6960
    if not del_tags <= cur_tags:
6961
      diff_tags = del_tags - cur_tags
6962
      diff_names = ["'%s'" % tag for tag in diff_tags]
6963
      diff_names.sort()
6964
      raise errors.OpPrereqError("Tag(s) %s not found" %
6965
                                 (",".join(diff_names)))
6966

    
6967
  def Exec(self, feedback_fn):
6968
    """Remove the tag from the object.
6969

6970
    """
6971
    for tag in self.op.tags:
6972
      self.target.RemoveTag(tag)
6973
    try:
6974
      self.cfg.Update(self.target)
6975
    except errors.ConfigurationError:
6976
      raise errors.OpRetryError("There has been a modification to the"
6977
                                " config file and the operation has been"
6978
                                " aborted. Please retry.")
6979

    
6980

    
6981
class LUTestDelay(NoHooksLU):
6982
  """Sleep for a specified amount of time.
6983

6984
  This LU sleeps on the master and/or nodes for a specified amount of
6985
  time.
6986

6987
  """
6988
  _OP_REQP = ["duration", "on_master", "on_nodes"]
6989
  REQ_BGL = False
6990

    
6991
  def ExpandNames(self):
6992
    """Expand names and set required locks.
6993

6994
    This expands the node list, if any.
6995

6996
    """
6997
    self.needed_locks = {}
6998
    if self.op.on_nodes:
6999
      # _GetWantedNodes can be used here, but is not always appropriate to use
7000
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
7001
      # more information.
7002
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
7003
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
7004

    
7005
  def CheckPrereq(self):
7006
    """Check prerequisites.
7007

7008
    """
7009

    
7010
  def Exec(self, feedback_fn):
7011
    """Do the actual sleep.
7012

7013
    """
7014
    if self.op.on_master:
7015
      if not utils.TestDelay(self.op.duration):
7016
        raise errors.OpExecError("Error during master delay test")
7017
    if self.op.on_nodes:
7018
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
7019
      for node, node_result in result.items():
7020
        node_result.Raise("Failure during rpc call to node %s" % node)
7021

    
7022

    
7023
class IAllocator(object):
7024
  """IAllocator framework.
7025

7026
  An IAllocator instance has three sets of attributes:
7027
    - cfg that is needed to query the cluster
7028
    - input data (all members of the _KEYS class attribute are required)
7029
    - four buffer attributes (in|out_data|text), that represent the
7030
      input (to the external script) in text and data structure format,
7031
      and the output from it, again in two formats
7032
    - the result variables from the script (success, info, nodes) for
7033
      easy usage
7034

7035
  """
7036
  _ALLO_KEYS = [
7037
    "mem_size", "disks", "disk_template",
7038
    "os", "tags", "nics", "vcpus", "hypervisor",
7039
    ]
7040
  _RELO_KEYS = [
7041
    "relocate_from",
7042
    ]
7043

    
7044
  def __init__(self, cfg, rpc, mode, name, **kwargs):
7045
    self.cfg = cfg
7046
    self.rpc = rpc
7047
    # init buffer variables
7048
    self.in_text = self.out_text = self.in_data = self.out_data = None
7049
    # init all input fields so that pylint is happy
7050
    self.mode = mode
7051
    self.name = name
7052
    self.mem_size = self.disks = self.disk_template = None
7053
    self.os = self.tags = self.nics = self.vcpus = None
7054
    self.hypervisor = None
7055
    self.relocate_from = None
7056
    # computed fields
7057
    self.required_nodes = None
7058
    # init result fields
7059
    self.success = self.info = self.nodes = None
7060
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7061
      keyset = self._ALLO_KEYS
7062
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
7063
      keyset = self._RELO_KEYS
7064
    else:
7065
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
7066
                                   " IAllocator" % self.mode)
7067
    for key in kwargs:
7068
      if key not in keyset:
7069
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
7070
                                     " IAllocator" % key)
7071
      setattr(self, key, kwargs[key])
7072
    for key in keyset:
7073
      if key not in kwargs:
7074
        raise errors.ProgrammerError("Missing input parameter '%s' to"
7075
                                     " IAllocator" % key)
7076
    self._BuildInputData()
7077

    
7078
  def _ComputeClusterData(self):
7079
    """Compute the generic allocator input data.
7080

7081
    This is the data that is independent of the actual operation.
7082

7083
    """
7084
    cfg = self.cfg
7085
    cluster_info = cfg.GetClusterInfo()
7086
    # cluster data
7087
    data = {
7088
      "version": constants.IALLOCATOR_VERSION,
7089
      "cluster_name": cfg.GetClusterName(),
7090
      "cluster_tags": list(cluster_info.GetTags()),
7091
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
7092
      # we don't have job IDs
7093
      }
7094
    iinfo = cfg.GetAllInstancesInfo().values()
7095
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
7096

    
7097
    # node data
7098
    node_results = {}
7099
    node_list = cfg.GetNodeList()
7100

    
7101
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7102
      hypervisor_name = self.hypervisor
7103
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
7104
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
7105

    
7106
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
7107
                                        hypervisor_name)
7108
    node_iinfo = \
7109
      self.rpc.call_all_instances_info(node_list,
7110
                                       cluster_info.enabled_hypervisors)
7111
    for nname, nresult in node_data.items():
7112
      # first fill in static (config-based) values
7113
      ninfo = cfg.GetNodeInfo(nname)
7114
      pnr = {
7115
        "tags": list(ninfo.GetTags()),
7116
        "primary_ip": ninfo.primary_ip,
7117
        "secondary_ip": ninfo.secondary_ip,
7118
        "offline": ninfo.offline,
7119
        "drained": ninfo.drained,
7120
        "master_candidate": ninfo.master_candidate,
7121
        }
7122

    
7123
      if not ninfo.offline:
7124
        nresult.Raise("Can't get data for node %s" % nname)
7125
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
7126
                                nname)
7127
        remote_info = nresult.payload
7128
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
7129
                     'vg_size', 'vg_free', 'cpu_total']:
7130
          if attr not in remote_info:
7131
            raise errors.OpExecError("Node '%s' didn't return attribute"
7132
                                     " '%s'" % (nname, attr))
7133
          if not isinstance(remote_info[attr], int):
7134
            raise errors.OpExecError("Node '%s' returned invalid value"
7135
                                     " for '%s': %s" %
7136
                                     (nname, attr, remote_info[attr]))
7137
        # compute memory used by primary instances
7138
        i_p_mem = i_p_up_mem = 0
7139
        for iinfo, beinfo in i_list:
7140
          if iinfo.primary_node == nname:
7141
            i_p_mem += beinfo[constants.BE_MEMORY]
7142
            if iinfo.name not in node_iinfo[nname].payload:
7143
              i_used_mem = 0
7144
            else:
7145
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
7146
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
7147
            remote_info['memory_free'] -= max(0, i_mem_diff)
7148

    
7149
            if iinfo.admin_up:
7150
              i_p_up_mem += beinfo[constants.BE_MEMORY]
7151

    
7152
        # compute memory used by instances
7153
        pnr_dyn = {
7154
          "total_memory": remote_info['memory_total'],
7155
          "reserved_memory": remote_info['memory_dom0'],
7156
          "free_memory": remote_info['memory_free'],
7157
          "total_disk": remote_info['vg_size'],
7158
          "free_disk": remote_info['vg_free'],
7159
          "total_cpus": remote_info['cpu_total'],
7160
          "i_pri_memory": i_p_mem,
7161
          "i_pri_up_memory": i_p_up_mem,
7162
          }
7163
        pnr.update(pnr_dyn)
7164

    
7165
      node_results[nname] = pnr
7166
    data["nodes"] = node_results
7167

    
7168
    # instance data
7169
    instance_data = {}
7170
    for iinfo, beinfo in i_list:
7171
      nic_data = []
7172
      for nic in iinfo.nics:
7173
        filled_params = objects.FillDict(
7174
            cluster_info.nicparams[constants.PP_DEFAULT],
7175
            nic.nicparams)
7176
        nic_dict = {"mac": nic.mac,
7177
                    "ip": nic.ip,
7178
                    "mode": filled_params[constants.NIC_MODE],
7179
                    "link": filled_params[constants.NIC_LINK],
7180
                   }
7181
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
7182
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
7183
        nic_data.append(nic_dict)
7184
      pir = {
7185
        "tags": list(iinfo.GetTags()),
7186
        "admin_up": iinfo.admin_up,
7187
        "vcpus": beinfo[constants.BE_VCPUS],
7188
        "memory": beinfo[constants.BE_MEMORY],
7189
        "os": iinfo.os,
7190
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
7191
        "nics": nic_data,
7192
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
7193
        "disk_template": iinfo.disk_template,
7194
        "hypervisor": iinfo.hypervisor,
7195
        }
7196
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
7197
                                                 pir["disks"])
7198
      instance_data[iinfo.name] = pir
7199

    
7200
    data["instances"] = instance_data
7201

    
7202
    self.in_data = data
7203

    
7204
  def _AddNewInstance(self):
7205
    """Add new instance data to allocator structure.
7206

7207
    This in combination with _AllocatorGetClusterData will create the
7208
    correct structure needed as input for the allocator.
7209

7210
    The checks for the completeness of the opcode must have already been
7211
    done.
7212

7213
    """
7214
    data = self.in_data
7215

    
7216
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
7217

    
7218
    if self.disk_template in constants.DTS_NET_MIRROR:
7219
      self.required_nodes = 2
7220
    else:
7221
      self.required_nodes = 1
7222
    request = {
7223
      "type": "allocate",
7224
      "name": self.name,
7225
      "disk_template": self.disk_template,
7226
      "tags": self.tags,
7227
      "os": self.os,
7228
      "vcpus": self.vcpus,
7229
      "memory": self.mem_size,
7230
      "disks": self.disks,
7231
      "disk_space_total": disk_space,
7232
      "nics": self.nics,
7233
      "required_nodes": self.required_nodes,
7234
      }
7235
    data["request"] = request
7236

    
7237
  def _AddRelocateInstance(self):
7238
    """Add relocate instance data to allocator structure.
7239

7240
    This in combination with _IAllocatorGetClusterData will create the
7241
    correct structure needed as input for the allocator.
7242

7243
    The checks for the completeness of the opcode must have already been
7244
    done.
7245

7246
    """
7247
    instance = self.cfg.GetInstanceInfo(self.name)
7248
    if instance is None:
7249
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
7250
                                   " IAllocator" % self.name)
7251

    
7252
    if instance.disk_template not in constants.DTS_NET_MIRROR:
7253
      raise errors.OpPrereqError("Can't relocate non-mirrored instances")
7254

    
7255
    if len(instance.secondary_nodes) != 1:
7256
      raise errors.OpPrereqError("Instance has not exactly one secondary node")
7257

    
7258
    self.required_nodes = 1
7259
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
7260
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
7261

    
7262
    request = {
7263
      "type": "relocate",
7264
      "name": self.name,
7265
      "disk_space_total": disk_space,
7266
      "required_nodes": self.required_nodes,
7267
      "relocate_from": self.relocate_from,
7268
      }
7269
    self.in_data["request"] = request
7270

    
7271
  def _BuildInputData(self):
7272
    """Build input data structures.
7273

7274
    """
7275
    self._ComputeClusterData()
7276

    
7277
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7278
      self._AddNewInstance()
7279
    else:
7280
      self._AddRelocateInstance()
7281

    
7282
    self.in_text = serializer.Dump(self.in_data)
7283

    
7284
  def Run(self, name, validate=True, call_fn=None):
7285
    """Run an instance allocator and return the results.
7286

7287
    """
7288
    if call_fn is None:
7289
      call_fn = self.rpc.call_iallocator_runner
7290

    
7291
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
7292
    result.Raise("Failure while running the iallocator script")
7293

    
7294
    self.out_text = result.payload
7295
    if validate:
7296
      self._ValidateResult()
7297

    
7298
  def _ValidateResult(self):
7299
    """Process the allocator results.
7300

7301
    This will process and if successful save the result in
7302
    self.out_data and the other parameters.
7303

7304
    """
7305
    try:
7306
      rdict = serializer.Load(self.out_text)
7307
    except Exception, err:
7308
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
7309

    
7310
    if not isinstance(rdict, dict):
7311
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
7312

    
7313
    for key in "success", "info", "nodes":
7314
      if key not in rdict:
7315
        raise errors.OpExecError("Can't parse iallocator results:"
7316
                                 " missing key '%s'" % key)
7317
      setattr(self, key, rdict[key])
7318

    
7319
    if not isinstance(rdict["nodes"], list):
7320
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
7321
                               " is not a list")
7322
    self.out_data = rdict
7323

    
7324

    
7325
class LUTestAllocator(NoHooksLU):
7326
  """Run allocator tests.
7327

7328
  This LU runs the allocator tests
7329

7330
  """
7331
  _OP_REQP = ["direction", "mode", "name"]
7332

    
7333
  def CheckPrereq(self):
7334
    """Check prerequisites.
7335

7336
    This checks the opcode parameters depending on the director and mode test.
7337

7338
    """
7339
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
7340
      for attr in ["name", "mem_size", "disks", "disk_template",
7341
                   "os", "tags", "nics", "vcpus"]:
7342
        if not hasattr(self.op, attr):
7343
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
7344
                                     attr)
7345
      iname = self.cfg.ExpandInstanceName(self.op.name)
7346
      if iname is not None:
7347
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
7348
                                   iname)
7349
      if not isinstance(self.op.nics, list):
7350
        raise errors.OpPrereqError("Invalid parameter 'nics'")
7351
      for row in self.op.nics:
7352
        if (not isinstance(row, dict) or
7353
            "mac" not in row or
7354
            "ip" not in row or
7355
            "bridge" not in row):
7356
          raise errors.OpPrereqError("Invalid contents of the"
7357
                                     " 'nics' parameter")
7358
      if not isinstance(self.op.disks, list):
7359
        raise errors.OpPrereqError("Invalid parameter 'disks'")
7360
      for row in self.op.disks:
7361
        if (not isinstance(row, dict) or
7362
            "size" not in row or
7363
            not isinstance(row["size"], int) or
7364
            "mode" not in row or
7365
            row["mode"] not in ['r', 'w']):
7366
          raise errors.OpPrereqError("Invalid contents of the"
7367
                                     " 'disks' parameter")
7368
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
7369
        self.op.hypervisor = self.cfg.GetHypervisorType()
7370
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
7371
      if not hasattr(self.op, "name"):
7372
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
7373
      fname = self.cfg.ExpandInstanceName(self.op.name)
7374
      if fname is None:
7375
        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
7376
                                   self.op.name)
7377
      self.op.name = fname
7378
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
7379
    else:
7380
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
7381
                                 self.op.mode)
7382

    
7383
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
7384
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
7385
        raise errors.OpPrereqError("Missing allocator name")
7386
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
7387
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
7388
                                 self.op.direction)
7389

    
7390
  def Exec(self, feedback_fn):
7391
    """Run the allocator test.
7392

7393
    """
7394
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
7395
      ial = IAllocator(self.cfg, self.rpc,
7396
                       mode=self.op.mode,
7397
                       name=self.op.name,
7398
                       mem_size=self.op.mem_size,
7399
                       disks=self.op.disks,
7400
                       disk_template=self.op.disk_template,
7401
                       os=self.op.os,
7402
                       tags=self.op.tags,
7403
                       nics=self.op.nics,
7404
                       vcpus=self.op.vcpus,
7405
                       hypervisor=self.op.hypervisor,
7406
                       )
7407
    else:
7408
      ial = IAllocator(self.cfg, self.rpc,
7409
                       mode=self.op.mode,
7410
                       name=self.op.name,
7411
                       relocate_from=list(self.relocate_from),
7412
                       )
7413

    
7414
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
7415
      result = ial.in_text
7416
    else:
7417
      ial.Run(self.op.allocator, validate=False)
7418
      result = ial.out_text
7419
    return result