Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 3e06e001

History | View | Annotate | Download (260.2 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0613,W0201
25

    
26
import os
27
import os.path
28
import time
29
import re
30
import platform
31
import logging
32
import copy
33

    
34
from ganeti import ssh
35
from ganeti import utils
36
from ganeti import errors
37
from ganeti import hypervisor
38
from ganeti import locking
39
from ganeti import constants
40
from ganeti import objects
41
from ganeti import serializer
42
from ganeti import ssconf
43

    
44

    
45
class LogicalUnit(object):
46
  """Logical Unit base class.
47

48
  Subclasses must follow these rules:
49
    - implement ExpandNames
50
    - implement CheckPrereq (except when tasklets are used)
51
    - implement Exec (except when tasklets are used)
52
    - implement BuildHooksEnv
53
    - redefine HPATH and HTYPE
54
    - optionally redefine their run requirements:
55
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
56

57
  Note that all commands require root permissions.
58

59
  @ivar dry_run_result: the value (if any) that will be returned to the caller
60
      in dry-run mode (signalled by opcode dry_run parameter)
61

62
  """
63
  HPATH = None
64
  HTYPE = None
65
  _OP_REQP = []
66
  REQ_BGL = True
67

    
68
  def __init__(self, processor, op, context, rpc):
69
    """Constructor for LogicalUnit.
70

71
    This needs to be overridden in derived classes in order to check op
72
    validity.
73

74
    """
75
    self.proc = processor
76
    self.op = op
77
    self.cfg = context.cfg
78
    self.context = context
79
    self.rpc = rpc
80
    # Dicts used to declare locking needs to mcpu
81
    self.needed_locks = None
82
    self.acquired_locks = {}
83
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
84
    self.add_locks = {}
85
    self.remove_locks = {}
86
    # Used to force good behavior when calling helper functions
87
    self.recalculate_locks = {}
88
    self.__ssh = None
89
    # logging
90
    self.LogWarning = processor.LogWarning
91
    self.LogInfo = processor.LogInfo
92
    self.LogStep = processor.LogStep
93
    # support for dry-run
94
    self.dry_run_result = None
95

    
96
    # Tasklets
97
    self.tasklets = []
98

    
99
    for attr_name in self._OP_REQP:
100
      attr_val = getattr(op, attr_name, None)
101
      if attr_val is None:
102
        raise errors.OpPrereqError("Required parameter '%s' missing" %
103
                                   attr_name)
104

    
105
    self.CheckArguments()
106

    
107
  def __GetSSH(self):
108
    """Returns the SshRunner object
109

110
    """
111
    if not self.__ssh:
112
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
113
    return self.__ssh
114

    
115
  ssh = property(fget=__GetSSH)
116

    
117
  def CheckArguments(self):
118
    """Check syntactic validity for the opcode arguments.
119

120
    This method is for doing a simple syntactic check and ensure
121
    validity of opcode parameters, without any cluster-related
122
    checks. While the same can be accomplished in ExpandNames and/or
123
    CheckPrereq, doing these separate is better because:
124

125
      - ExpandNames is left as as purely a lock-related function
126
      - CheckPrereq is run after we have acquired locks (and possible
127
        waited for them)
128

129
    The function is allowed to change the self.op attribute so that
130
    later methods can no longer worry about missing parameters.
131

132
    """
133
    pass
134

    
135
  def ExpandNames(self):
136
    """Expand names for this LU.
137

138
    This method is called before starting to execute the opcode, and it should
139
    update all the parameters of the opcode to their canonical form (e.g. a
140
    short node name must be fully expanded after this method has successfully
141
    completed). This way locking, hooks, logging, ecc. can work correctly.
142

143
    LUs which implement this method must also populate the self.needed_locks
144
    member, as a dict with lock levels as keys, and a list of needed lock names
145
    as values. Rules:
146

147
      - use an empty dict if you don't need any lock
148
      - if you don't need any lock at a particular level omit that level
149
      - don't put anything for the BGL level
150
      - if you want all locks at a level use locking.ALL_SET as a value
151

152
    If you need to share locks (rather than acquire them exclusively) at one
153
    level you can modify self.share_locks, setting a true value (usually 1) for
154
    that level. By default locks are not shared.
155

156
    This function can also define a list of tasklets, which then will be
157
    executed in order instead of the usual LU-level CheckPrereq and Exec
158
    functions, if those are not defined by the LU.
159

160
    Examples::
161

162
      # Acquire all nodes and one instance
163
      self.needed_locks = {
164
        locking.LEVEL_NODE: locking.ALL_SET,
165
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
166
      }
167
      # Acquire just two nodes
168
      self.needed_locks = {
169
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
170
      }
171
      # Acquire no locks
172
      self.needed_locks = {} # No, you can't leave it to the default value None
173

174
    """
175
    # The implementation of this method is mandatory only if the new LU is
176
    # concurrent, so that old LUs don't need to be changed all at the same
177
    # time.
178
    if self.REQ_BGL:
179
      self.needed_locks = {} # Exclusive LUs don't need locks.
180
    else:
181
      raise NotImplementedError
182

    
183
  def DeclareLocks(self, level):
184
    """Declare LU locking needs for a level
185

186
    While most LUs can just declare their locking needs at ExpandNames time,
187
    sometimes there's the need to calculate some locks after having acquired
188
    the ones before. This function is called just before acquiring locks at a
189
    particular level, but after acquiring the ones at lower levels, and permits
190
    such calculations. It can be used to modify self.needed_locks, and by
191
    default it does nothing.
192

193
    This function is only called if you have something already set in
194
    self.needed_locks for the level.
195

196
    @param level: Locking level which is going to be locked
197
    @type level: member of ganeti.locking.LEVELS
198

199
    """
200

    
201
  def CheckPrereq(self):
202
    """Check prerequisites for this LU.
203

204
    This method should check that the prerequisites for the execution
205
    of this LU are fulfilled. It can do internode communication, but
206
    it should be idempotent - no cluster or system changes are
207
    allowed.
208

209
    The method should raise errors.OpPrereqError in case something is
210
    not fulfilled. Its return value is ignored.
211

212
    This method should also update all the parameters of the opcode to
213
    their canonical form if it hasn't been done by ExpandNames before.
214

215
    """
216
    if self.tasklets:
217
      for tl in self.tasklets:
218
        tl.CheckPrereq()
219
    else:
220
      raise NotImplementedError
221

    
222
  def Exec(self, feedback_fn):
223
    """Execute the LU.
224

225
    This method should implement the actual work. It should raise
226
    errors.OpExecError for failures that are somewhat dealt with in
227
    code, or expected.
228

229
    """
230
    if self.tasklets:
231
      for tl in self.tasklets:
232
        tl.Exec(feedback_fn)
233
    else:
234
      raise NotImplementedError
235

    
236
  def BuildHooksEnv(self):
237
    """Build hooks environment for this LU.
238

239
    This method should return a three-node tuple consisting of: a dict
240
    containing the environment that will be used for running the
241
    specific hook for this LU, a list of node names on which the hook
242
    should run before the execution, and a list of node names on which
243
    the hook should run after the execution.
244

245
    The keys of the dict must not have 'GANETI_' prefixed as this will
246
    be handled in the hooks runner. Also note additional keys will be
247
    added by the hooks runner. If the LU doesn't define any
248
    environment, an empty dict (and not None) should be returned.
249

250
    No nodes should be returned as an empty list (and not None).
251

252
    Note that if the HPATH for a LU class is None, this function will
253
    not be called.
254

255
    """
256
    raise NotImplementedError
257

    
258
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
259
    """Notify the LU about the results of its hooks.
260

261
    This method is called every time a hooks phase is executed, and notifies
262
    the Logical Unit about the hooks' result. The LU can then use it to alter
263
    its result based on the hooks.  By default the method does nothing and the
264
    previous result is passed back unchanged but any LU can define it if it
265
    wants to use the local cluster hook-scripts somehow.
266

267
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
268
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
269
    @param hook_results: the results of the multi-node hooks rpc call
270
    @param feedback_fn: function used send feedback back to the caller
271
    @param lu_result: the previous Exec result this LU had, or None
272
        in the PRE phase
273
    @return: the new Exec result, based on the previous result
274
        and hook results
275

276
    """
277
    return lu_result
278

    
279
  def _ExpandAndLockInstance(self):
280
    """Helper function to expand and lock an instance.
281

282
    Many LUs that work on an instance take its name in self.op.instance_name
283
    and need to expand it and then declare the expanded name for locking. This
284
    function does it, and then updates self.op.instance_name to the expanded
285
    name. It also initializes needed_locks as a dict, if this hasn't been done
286
    before.
287

288
    """
289
    if self.needed_locks is None:
290
      self.needed_locks = {}
291
    else:
292
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
293
        "_ExpandAndLockInstance called with instance-level locks set"
294
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
295
    if expanded_name is None:
296
      raise errors.OpPrereqError("Instance '%s' not known" %
297
                                  self.op.instance_name)
298
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
299
    self.op.instance_name = expanded_name
300

    
301
  def _LockInstancesNodes(self, primary_only=False):
302
    """Helper function to declare instances' nodes for locking.
303

304
    This function should be called after locking one or more instances to lock
305
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
306
    with all primary or secondary nodes for instances already locked and
307
    present in self.needed_locks[locking.LEVEL_INSTANCE].
308

309
    It should be called from DeclareLocks, and for safety only works if
310
    self.recalculate_locks[locking.LEVEL_NODE] is set.
311

312
    In the future it may grow parameters to just lock some instance's nodes, or
313
    to just lock primaries or secondary nodes, if needed.
314

315
    If should be called in DeclareLocks in a way similar to::
316

317
      if level == locking.LEVEL_NODE:
318
        self._LockInstancesNodes()
319

320
    @type primary_only: boolean
321
    @param primary_only: only lock primary nodes of locked instances
322

323
    """
324
    assert locking.LEVEL_NODE in self.recalculate_locks, \
325
      "_LockInstancesNodes helper function called with no nodes to recalculate"
326

    
327
    # TODO: check if we're really been called with the instance locks held
328

    
329
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
330
    # future we might want to have different behaviors depending on the value
331
    # of self.recalculate_locks[locking.LEVEL_NODE]
332
    wanted_nodes = []
333
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
334
      instance = self.context.cfg.GetInstanceInfo(instance_name)
335
      wanted_nodes.append(instance.primary_node)
336
      if not primary_only:
337
        wanted_nodes.extend(instance.secondary_nodes)
338

    
339
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
340
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
341
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
342
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
343

    
344
    del self.recalculate_locks[locking.LEVEL_NODE]
345

    
346

    
347
class NoHooksLU(LogicalUnit):
348
  """Simple LU which runs no hooks.
349

350
  This LU is intended as a parent for other LogicalUnits which will
351
  run no hooks, in order to reduce duplicate code.
352

353
  """
354
  HPATH = None
355
  HTYPE = None
356

    
357

    
358
class Tasklet:
359
  """Tasklet base class.
360

361
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
362
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
363
  tasklets know nothing about locks.
364

365
  Subclasses must follow these rules:
366
    - Implement CheckPrereq
367
    - Implement Exec
368

369
  """
370
  def CheckPrereq(self):
371
    """Check prerequisites for this tasklets.
372

373
    This method should check whether the prerequisites for the execution of
374
    this tasklet are fulfilled. It can do internode communication, but it
375
    should be idempotent - no cluster or system changes are allowed.
376

377
    The method should raise errors.OpPrereqError in case something is not
378
    fulfilled. Its return value is ignored.
379

380
    This method should also update all parameters to their canonical form if it
381
    hasn't been done before.
382

383
    """
384
    raise NotImplementedError
385

    
386
  def Exec(self, feedback_fn):
387
    """Execute the tasklet.
388

389
    This method should implement the actual work. It should raise
390
    errors.OpExecError for failures that are somewhat dealt with in code, or
391
    expected.
392

393
    """
394
    raise NotImplementedError
395

    
396

    
397
def _GetWantedNodes(lu, nodes):
398
  """Returns list of checked and expanded node names.
399

400
  @type lu: L{LogicalUnit}
401
  @param lu: the logical unit on whose behalf we execute
402
  @type nodes: list
403
  @param nodes: list of node names or None for all nodes
404
  @rtype: list
405
  @return: the list of nodes, sorted
406
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
407

408
  """
409
  if not isinstance(nodes, list):
410
    raise errors.OpPrereqError("Invalid argument type 'nodes'")
411

    
412
  if not nodes:
413
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
414
      " non-empty list of nodes whose name is to be expanded.")
415

    
416
  wanted = []
417
  for name in nodes:
418
    node = lu.cfg.ExpandNodeName(name)
419
    if node is None:
420
      raise errors.OpPrereqError("No such node name '%s'" % name)
421
    wanted.append(node)
422

    
423
  return utils.NiceSort(wanted)
424

    
425

    
426
def _GetWantedInstances(lu, instances):
427
  """Returns list of checked and expanded instance names.
428

429
  @type lu: L{LogicalUnit}
430
  @param lu: the logical unit on whose behalf we execute
431
  @type instances: list
432
  @param instances: list of instance names or None for all instances
433
  @rtype: list
434
  @return: the list of instances, sorted
435
  @raise errors.OpPrereqError: if the instances parameter is wrong type
436
  @raise errors.OpPrereqError: if any of the passed instances is not found
437

438
  """
439
  if not isinstance(instances, list):
440
    raise errors.OpPrereqError("Invalid argument type 'instances'")
441

    
442
  if instances:
443
    wanted = []
444

    
445
    for name in instances:
446
      instance = lu.cfg.ExpandInstanceName(name)
447
      if instance is None:
448
        raise errors.OpPrereqError("No such instance name '%s'" % name)
449
      wanted.append(instance)
450

    
451
  else:
452
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
453
  return wanted
454

    
455

    
456
def _CheckOutputFields(static, dynamic, selected):
457
  """Checks whether all selected fields are valid.
458

459
  @type static: L{utils.FieldSet}
460
  @param static: static fields set
461
  @type dynamic: L{utils.FieldSet}
462
  @param dynamic: dynamic fields set
463

464
  """
465
  f = utils.FieldSet()
466
  f.Extend(static)
467
  f.Extend(dynamic)
468

    
469
  delta = f.NonMatching(selected)
470
  if delta:
471
    raise errors.OpPrereqError("Unknown output fields selected: %s"
472
                               % ",".join(delta))
473

    
474

    
475
def _CheckBooleanOpField(op, name):
476
  """Validates boolean opcode parameters.
477

478
  This will ensure that an opcode parameter is either a boolean value,
479
  or None (but that it always exists).
480

481
  """
482
  val = getattr(op, name, None)
483
  if not (val is None or isinstance(val, bool)):
484
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
485
                               (name, str(val)))
486
  setattr(op, name, val)
487

    
488

    
489
def _CheckNodeOnline(lu, node):
490
  """Ensure that a given node is online.
491

492
  @param lu: the LU on behalf of which we make the check
493
  @param node: the node to check
494
  @raise errors.OpPrereqError: if the node is offline
495

496
  """
497
  if lu.cfg.GetNodeInfo(node).offline:
498
    raise errors.OpPrereqError("Can't use offline node %s" % node)
499

    
500

    
501
def _CheckNodeNotDrained(lu, node):
502
  """Ensure that a given node is not drained.
503

504
  @param lu: the LU on behalf of which we make the check
505
  @param node: the node to check
506
  @raise errors.OpPrereqError: if the node is drained
507

508
  """
509
  if lu.cfg.GetNodeInfo(node).drained:
510
    raise errors.OpPrereqError("Can't use drained node %s" % node)
511

    
512

    
513
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
514
                          memory, vcpus, nics, disk_template, disks,
515
                          bep, hvp, hypervisor_name):
516
  """Builds instance related env variables for hooks
517

518
  This builds the hook environment from individual variables.
519

520
  @type name: string
521
  @param name: the name of the instance
522
  @type primary_node: string
523
  @param primary_node: the name of the instance's primary node
524
  @type secondary_nodes: list
525
  @param secondary_nodes: list of secondary nodes as strings
526
  @type os_type: string
527
  @param os_type: the name of the instance's OS
528
  @type status: boolean
529
  @param status: the should_run status of the instance
530
  @type memory: string
531
  @param memory: the memory size of the instance
532
  @type vcpus: string
533
  @param vcpus: the count of VCPUs the instance has
534
  @type nics: list
535
  @param nics: list of tuples (ip, mac, mode, link) representing
536
      the NICs the instance has
537
  @type disk_template: string
538
  @param disk_template: the disk template of the instance
539
  @type disks: list
540
  @param disks: the list of (size, mode) pairs
541
  @type bep: dict
542
  @param bep: the backend parameters for the instance
543
  @type hvp: dict
544
  @param hvp: the hypervisor parameters for the instance
545
  @type hypervisor_name: string
546
  @param hypervisor_name: the hypervisor for the instance
547
  @rtype: dict
548
  @return: the hook environment for this instance
549

550
  """
551
  if status:
552
    str_status = "up"
553
  else:
554
    str_status = "down"
555
  env = {
556
    "OP_TARGET": name,
557
    "INSTANCE_NAME": name,
558
    "INSTANCE_PRIMARY": primary_node,
559
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
560
    "INSTANCE_OS_TYPE": os_type,
561
    "INSTANCE_STATUS": str_status,
562
    "INSTANCE_MEMORY": memory,
563
    "INSTANCE_VCPUS": vcpus,
564
    "INSTANCE_DISK_TEMPLATE": disk_template,
565
    "INSTANCE_HYPERVISOR": hypervisor_name,
566
  }
567

    
568
  if nics:
569
    nic_count = len(nics)
570
    for idx, (ip, mac, mode, link) in enumerate(nics):
571
      if ip is None:
572
        ip = ""
573
      env["INSTANCE_NIC%d_IP" % idx] = ip
574
      env["INSTANCE_NIC%d_MAC" % idx] = mac
575
      env["INSTANCE_NIC%d_MODE" % idx] = mode
576
      env["INSTANCE_NIC%d_LINK" % idx] = link
577
      if mode == constants.NIC_MODE_BRIDGED:
578
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
579
  else:
580
    nic_count = 0
581

    
582
  env["INSTANCE_NIC_COUNT"] = nic_count
583

    
584
  if disks:
585
    disk_count = len(disks)
586
    for idx, (size, mode) in enumerate(disks):
587
      env["INSTANCE_DISK%d_SIZE" % idx] = size
588
      env["INSTANCE_DISK%d_MODE" % idx] = mode
589
  else:
590
    disk_count = 0
591

    
592
  env["INSTANCE_DISK_COUNT"] = disk_count
593

    
594
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
595
    for key, value in source.items():
596
      env["INSTANCE_%s_%s" % (kind, key)] = value
597

    
598
  return env
599

    
600
def _NICListToTuple(lu, nics):
601
  """Build a list of nic information tuples.
602

603
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
604
  value in LUQueryInstanceData.
605

606
  @type lu:  L{LogicalUnit}
607
  @param lu: the logical unit on whose behalf we execute
608
  @type nics: list of L{objects.NIC}
609
  @param nics: list of nics to convert to hooks tuples
610

611
  """
612
  hooks_nics = []
613
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
614
  for nic in nics:
615
    ip = nic.ip
616
    mac = nic.mac
617
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
618
    mode = filled_params[constants.NIC_MODE]
619
    link = filled_params[constants.NIC_LINK]
620
    hooks_nics.append((ip, mac, mode, link))
621
  return hooks_nics
622

    
623
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
624
  """Builds instance related env variables for hooks from an object.
625

626
  @type lu: L{LogicalUnit}
627
  @param lu: the logical unit on whose behalf we execute
628
  @type instance: L{objects.Instance}
629
  @param instance: the instance for which we should build the
630
      environment
631
  @type override: dict
632
  @param override: dictionary with key/values that will override
633
      our values
634
  @rtype: dict
635
  @return: the hook environment dictionary
636

637
  """
638
  cluster = lu.cfg.GetClusterInfo()
639
  bep = cluster.FillBE(instance)
640
  hvp = cluster.FillHV(instance)
641
  args = {
642
    'name': instance.name,
643
    'primary_node': instance.primary_node,
644
    'secondary_nodes': instance.secondary_nodes,
645
    'os_type': instance.os,
646
    'status': instance.admin_up,
647
    'memory': bep[constants.BE_MEMORY],
648
    'vcpus': bep[constants.BE_VCPUS],
649
    'nics': _NICListToTuple(lu, instance.nics),
650
    'disk_template': instance.disk_template,
651
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
652
    'bep': bep,
653
    'hvp': hvp,
654
    'hypervisor_name': instance.hypervisor,
655
  }
656
  if override:
657
    args.update(override)
658
  return _BuildInstanceHookEnv(**args)
659

    
660

    
661
def _AdjustCandidatePool(lu):
662
  """Adjust the candidate pool after node operations.
663

664
  """
665
  mod_list = lu.cfg.MaintainCandidatePool()
666
  if mod_list:
667
    lu.LogInfo("Promoted nodes to master candidate role: %s",
668
               ", ".join(node.name for node in mod_list))
669
    for name in mod_list:
670
      lu.context.ReaddNode(name)
671
  mc_now, mc_max = lu.cfg.GetMasterCandidateStats()
672
  if mc_now > mc_max:
673
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
674
               (mc_now, mc_max))
675

    
676

    
677
def _CheckNicsBridgesExist(lu, target_nics, target_node,
678
                               profile=constants.PP_DEFAULT):
679
  """Check that the brigdes needed by a list of nics exist.
680

681
  """
682
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
683
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
684
                for nic in target_nics]
685
  brlist = [params[constants.NIC_LINK] for params in paramslist
686
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
687
  if brlist:
688
    result = lu.rpc.call_bridges_exist(target_node, brlist)
689
    result.Raise("Error checking bridges on destination node '%s'" %
690
                 target_node, prereq=True)
691

    
692

    
693
def _CheckInstanceBridgesExist(lu, instance, node=None):
694
  """Check that the brigdes needed by an instance exist.
695

696
  """
697
  if node is None:
698
    node = instance.primary_node
699
  _CheckNicsBridgesExist(lu, instance.nics, node)
700

    
701

    
702
def _GetNodeSecondaryInstances(cfg, node_name):
703
  """Returns secondary instances on a node.
704

705
  """
706
  instances = []
707

    
708
  for (_, inst) in cfg.GetAllInstancesInfo().iteritems():
709
    if node_name in inst.secondary_nodes:
710
      instances.append(inst)
711

    
712
  return instances
713

    
714

    
715
class LUDestroyCluster(NoHooksLU):
716
  """Logical unit for destroying the cluster.
717

718
  """
719
  _OP_REQP = []
720

    
721
  def CheckPrereq(self):
722
    """Check prerequisites.
723

724
    This checks whether the cluster is empty.
725

726
    Any errors are signaled by raising errors.OpPrereqError.
727

728
    """
729
    master = self.cfg.GetMasterNode()
730

    
731
    nodelist = self.cfg.GetNodeList()
732
    if len(nodelist) != 1 or nodelist[0] != master:
733
      raise errors.OpPrereqError("There are still %d node(s) in"
734
                                 " this cluster." % (len(nodelist) - 1))
735
    instancelist = self.cfg.GetInstanceList()
736
    if instancelist:
737
      raise errors.OpPrereqError("There are still %d instance(s) in"
738
                                 " this cluster." % len(instancelist))
739

    
740
  def Exec(self, feedback_fn):
741
    """Destroys the cluster.
742

743
    """
744
    master = self.cfg.GetMasterNode()
745
    result = self.rpc.call_node_stop_master(master, False)
746
    result.Raise("Could not disable the master role")
747
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
748
    utils.CreateBackup(priv_key)
749
    utils.CreateBackup(pub_key)
750
    return master
751

    
752

    
753
class LUVerifyCluster(LogicalUnit):
754
  """Verifies the cluster status.
755

756
  """
757
  HPATH = "cluster-verify"
758
  HTYPE = constants.HTYPE_CLUSTER
759
  _OP_REQP = ["skip_checks"]
760
  REQ_BGL = False
761

    
762
  def ExpandNames(self):
763
    self.needed_locks = {
764
      locking.LEVEL_NODE: locking.ALL_SET,
765
      locking.LEVEL_INSTANCE: locking.ALL_SET,
766
    }
767
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
768

    
769
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
770
                  node_result, feedback_fn, master_files,
771
                  drbd_map, vg_name):
772
    """Run multiple tests against a node.
773

774
    Test list:
775

776
      - compares ganeti version
777
      - checks vg existence and size > 20G
778
      - checks config file checksum
779
      - checks ssh to other nodes
780

781
    @type nodeinfo: L{objects.Node}
782
    @param nodeinfo: the node to check
783
    @param file_list: required list of files
784
    @param local_cksum: dictionary of local files and their checksums
785
    @param node_result: the results from the node
786
    @param feedback_fn: function used to accumulate results
787
    @param master_files: list of files that only masters should have
788
    @param drbd_map: the useddrbd minors for this node, in
789
        form of minor: (instance, must_exist) which correspond to instances
790
        and their running status
791
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
792

793
    """
794
    node = nodeinfo.name
795

    
796
    # main result, node_result should be a non-empty dict
797
    if not node_result or not isinstance(node_result, dict):
798
      feedback_fn("  - ERROR: unable to verify node %s." % (node,))
799
      return True
800

    
801
    # compares ganeti version
802
    local_version = constants.PROTOCOL_VERSION
803
    remote_version = node_result.get('version', None)
804
    if not (remote_version and isinstance(remote_version, (list, tuple)) and
805
            len(remote_version) == 2):
806
      feedback_fn("  - ERROR: connection to %s failed" % (node))
807
      return True
808

    
809
    if local_version != remote_version[0]:
810
      feedback_fn("  - ERROR: incompatible protocol versions: master %s,"
811
                  " node %s %s" % (local_version, node, remote_version[0]))
812
      return True
813

    
814
    # node seems compatible, we can actually try to look into its results
815

    
816
    bad = False
817

    
818
    # full package version
819
    if constants.RELEASE_VERSION != remote_version[1]:
820
      feedback_fn("  - WARNING: software version mismatch: master %s,"
821
                  " node %s %s" %
822
                  (constants.RELEASE_VERSION, node, remote_version[1]))
823

    
824
    # checks vg existence and size > 20G
825
    if vg_name is not None:
826
      vglist = node_result.get(constants.NV_VGLIST, None)
827
      if not vglist:
828
        feedback_fn("  - ERROR: unable to check volume groups on node %s." %
829
                        (node,))
830
        bad = True
831
      else:
832
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
833
                                              constants.MIN_VG_SIZE)
834
        if vgstatus:
835
          feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
836
          bad = True
837

    
838
    # checks config file checksum
839

    
840
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
841
    if not isinstance(remote_cksum, dict):
842
      bad = True
843
      feedback_fn("  - ERROR: node hasn't returned file checksum data")
844
    else:
845
      for file_name in file_list:
846
        node_is_mc = nodeinfo.master_candidate
847
        must_have_file = file_name not in master_files
848
        if file_name not in remote_cksum:
849
          if node_is_mc or must_have_file:
850
            bad = True
851
            feedback_fn("  - ERROR: file '%s' missing" % file_name)
852
        elif remote_cksum[file_name] != local_cksum[file_name]:
853
          if node_is_mc or must_have_file:
854
            bad = True
855
            feedback_fn("  - ERROR: file '%s' has wrong checksum" % file_name)
856
          else:
857
            # not candidate and this is not a must-have file
858
            bad = True
859
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
860
                        " candidates (and the file is outdated)" % file_name)
861
        else:
862
          # all good, except non-master/non-must have combination
863
          if not node_is_mc and not must_have_file:
864
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
865
                        " candidates" % file_name)
866

    
867
    # checks ssh to any
868

    
869
    if constants.NV_NODELIST not in node_result:
870
      bad = True
871
      feedback_fn("  - ERROR: node hasn't returned node ssh connectivity data")
872
    else:
873
      if node_result[constants.NV_NODELIST]:
874
        bad = True
875
        for node in node_result[constants.NV_NODELIST]:
876
          feedback_fn("  - ERROR: ssh communication with node '%s': %s" %
877
                          (node, node_result[constants.NV_NODELIST][node]))
878

    
879
    if constants.NV_NODENETTEST not in node_result:
880
      bad = True
881
      feedback_fn("  - ERROR: node hasn't returned node tcp connectivity data")
882
    else:
883
      if node_result[constants.NV_NODENETTEST]:
884
        bad = True
885
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
886
        for node in nlist:
887
          feedback_fn("  - ERROR: tcp communication with node '%s': %s" %
888
                          (node, node_result[constants.NV_NODENETTEST][node]))
889

    
890
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
891
    if isinstance(hyp_result, dict):
892
      for hv_name, hv_result in hyp_result.iteritems():
893
        if hv_result is not None:
894
          feedback_fn("  - ERROR: hypervisor %s verify failure: '%s'" %
895
                      (hv_name, hv_result))
896

    
897
    # check used drbd list
898
    if vg_name is not None:
899
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
900
      if not isinstance(used_minors, (tuple, list)):
901
        feedback_fn("  - ERROR: cannot parse drbd status file: %s" %
902
                    str(used_minors))
903
      else:
904
        for minor, (iname, must_exist) in drbd_map.items():
905
          if minor not in used_minors and must_exist:
906
            feedback_fn("  - ERROR: drbd minor %d of instance %s is"
907
                        " not active" % (minor, iname))
908
            bad = True
909
        for minor in used_minors:
910
          if minor not in drbd_map:
911
            feedback_fn("  - ERROR: unallocated drbd minor %d is in use" %
912
                        minor)
913
            bad = True
914

    
915
    return bad
916

    
917
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
918
                      node_instance, feedback_fn, n_offline):
919
    """Verify an instance.
920

921
    This function checks to see if the required block devices are
922
    available on the instance's node.
923

924
    """
925
    bad = False
926

    
927
    node_current = instanceconfig.primary_node
928

    
929
    node_vol_should = {}
930
    instanceconfig.MapLVsByNode(node_vol_should)
931

    
932
    for node in node_vol_should:
933
      if node in n_offline:
934
        # ignore missing volumes on offline nodes
935
        continue
936
      for volume in node_vol_should[node]:
937
        if node not in node_vol_is or volume not in node_vol_is[node]:
938
          feedback_fn("  - ERROR: volume %s missing on node %s" %
939
                          (volume, node))
940
          bad = True
941

    
942
    if instanceconfig.admin_up:
943
      if ((node_current not in node_instance or
944
          not instance in node_instance[node_current]) and
945
          node_current not in n_offline):
946
        feedback_fn("  - ERROR: instance %s not running on node %s" %
947
                        (instance, node_current))
948
        bad = True
949

    
950
    for node in node_instance:
951
      if (not node == node_current):
952
        if instance in node_instance[node]:
953
          feedback_fn("  - ERROR: instance %s should not run on node %s" %
954
                          (instance, node))
955
          bad = True
956

    
957
    return bad
958

    
959
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
960
    """Verify if there are any unknown volumes in the cluster.
961

962
    The .os, .swap and backup volumes are ignored. All other volumes are
963
    reported as unknown.
964

965
    """
966
    bad = False
967

    
968
    for node in node_vol_is:
969
      for volume in node_vol_is[node]:
970
        if node not in node_vol_should or volume not in node_vol_should[node]:
971
          feedback_fn("  - ERROR: volume %s on node %s should not exist" %
972
                      (volume, node))
973
          bad = True
974
    return bad
975

    
976
  def _VerifyOrphanInstances(self, instancelist, node_instance, feedback_fn):
977
    """Verify the list of running instances.
978

979
    This checks what instances are running but unknown to the cluster.
980

981
    """
982
    bad = False
983
    for node in node_instance:
984
      for runninginstance in node_instance[node]:
985
        if runninginstance not in instancelist:
986
          feedback_fn("  - ERROR: instance %s on node %s should not exist" %
987
                          (runninginstance, node))
988
          bad = True
989
    return bad
990

    
991
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg, feedback_fn):
992
    """Verify N+1 Memory Resilience.
993

994
    Check that if one single node dies we can still start all the instances it
995
    was primary for.
996

997
    """
998
    bad = False
999

    
1000
    for node, nodeinfo in node_info.iteritems():
1001
      # This code checks that every node which is now listed as secondary has
1002
      # enough memory to host all instances it is supposed to should a single
1003
      # other node in the cluster fail.
1004
      # FIXME: not ready for failover to an arbitrary node
1005
      # FIXME: does not support file-backed instances
1006
      # WARNING: we currently take into account down instances as well as up
1007
      # ones, considering that even if they're down someone might want to start
1008
      # them even in the event of a node failure.
1009
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1010
        needed_mem = 0
1011
        for instance in instances:
1012
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1013
          if bep[constants.BE_AUTO_BALANCE]:
1014
            needed_mem += bep[constants.BE_MEMORY]
1015
        if nodeinfo['mfree'] < needed_mem:
1016
          feedback_fn("  - ERROR: not enough memory on node %s to accommodate"
1017
                      " failovers should node %s fail" % (node, prinode))
1018
          bad = True
1019
    return bad
1020

    
1021
  def CheckPrereq(self):
1022
    """Check prerequisites.
1023

1024
    Transform the list of checks we're going to skip into a set and check that
1025
    all its members are valid.
1026

1027
    """
1028
    self.skip_set = frozenset(self.op.skip_checks)
1029
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1030
      raise errors.OpPrereqError("Invalid checks to be skipped specified")
1031

    
1032
  def BuildHooksEnv(self):
1033
    """Build hooks env.
1034

1035
    Cluster-Verify hooks just ran in the post phase and their failure makes
1036
    the output be logged in the verify output and the verification to fail.
1037

1038
    """
1039
    all_nodes = self.cfg.GetNodeList()
1040
    env = {
1041
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1042
      }
1043
    for node in self.cfg.GetAllNodesInfo().values():
1044
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1045

    
1046
    return env, [], all_nodes
1047

    
1048
  def Exec(self, feedback_fn):
1049
    """Verify integrity of cluster, performing various test on nodes.
1050

1051
    """
1052
    bad = False
1053
    feedback_fn("* Verifying global settings")
1054
    for msg in self.cfg.VerifyConfig():
1055
      feedback_fn("  - ERROR: %s" % msg)
1056

    
1057
    vg_name = self.cfg.GetVGName()
1058
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1059
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1060
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1061
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1062
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1063
                        for iname in instancelist)
1064
    i_non_redundant = [] # Non redundant instances
1065
    i_non_a_balanced = [] # Non auto-balanced instances
1066
    n_offline = [] # List of offline nodes
1067
    n_drained = [] # List of nodes being drained
1068
    node_volume = {}
1069
    node_instance = {}
1070
    node_info = {}
1071
    instance_cfg = {}
1072

    
1073
    # FIXME: verify OS list
1074
    # do local checksums
1075
    master_files = [constants.CLUSTER_CONF_FILE]
1076

    
1077
    file_names = ssconf.SimpleStore().GetFileList()
1078
    file_names.append(constants.SSL_CERT_FILE)
1079
    file_names.append(constants.RAPI_CERT_FILE)
1080
    file_names.extend(master_files)
1081

    
1082
    local_checksums = utils.FingerprintFiles(file_names)
1083

    
1084
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1085
    node_verify_param = {
1086
      constants.NV_FILELIST: file_names,
1087
      constants.NV_NODELIST: [node.name for node in nodeinfo
1088
                              if not node.offline],
1089
      constants.NV_HYPERVISOR: hypervisors,
1090
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1091
                                  node.secondary_ip) for node in nodeinfo
1092
                                 if not node.offline],
1093
      constants.NV_INSTANCELIST: hypervisors,
1094
      constants.NV_VERSION: None,
1095
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1096
      }
1097
    if vg_name is not None:
1098
      node_verify_param[constants.NV_VGLIST] = None
1099
      node_verify_param[constants.NV_LVLIST] = vg_name
1100
      node_verify_param[constants.NV_DRBDLIST] = None
1101
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1102
                                           self.cfg.GetClusterName())
1103

    
1104
    cluster = self.cfg.GetClusterInfo()
1105
    master_node = self.cfg.GetMasterNode()
1106
    all_drbd_map = self.cfg.ComputeDRBDMap()
1107

    
1108
    for node_i in nodeinfo:
1109
      node = node_i.name
1110

    
1111
      if node_i.offline:
1112
        feedback_fn("* Skipping offline node %s" % (node,))
1113
        n_offline.append(node)
1114
        continue
1115

    
1116
      if node == master_node:
1117
        ntype = "master"
1118
      elif node_i.master_candidate:
1119
        ntype = "master candidate"
1120
      elif node_i.drained:
1121
        ntype = "drained"
1122
        n_drained.append(node)
1123
      else:
1124
        ntype = "regular"
1125
      feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1126

    
1127
      msg = all_nvinfo[node].fail_msg
1128
      if msg:
1129
        feedback_fn("  - ERROR: while contacting node %s: %s" % (node, msg))
1130
        bad = True
1131
        continue
1132

    
1133
      nresult = all_nvinfo[node].payload
1134
      node_drbd = {}
1135
      for minor, instance in all_drbd_map[node].items():
1136
        if instance not in instanceinfo:
1137
          feedback_fn("  - ERROR: ghost instance '%s' in temporary DRBD map" %
1138
                      instance)
1139
          # ghost instance should not be running, but otherwise we
1140
          # don't give double warnings (both ghost instance and
1141
          # unallocated minor in use)
1142
          node_drbd[minor] = (instance, False)
1143
        else:
1144
          instance = instanceinfo[instance]
1145
          node_drbd[minor] = (instance.name, instance.admin_up)
1146
      result = self._VerifyNode(node_i, file_names, local_checksums,
1147
                                nresult, feedback_fn, master_files,
1148
                                node_drbd, vg_name)
1149
      bad = bad or result
1150

    
1151
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1152
      if vg_name is None:
1153
        node_volume[node] = {}
1154
      elif isinstance(lvdata, basestring):
1155
        feedback_fn("  - ERROR: LVM problem on node %s: %s" %
1156
                    (node, utils.SafeEncode(lvdata)))
1157
        bad = True
1158
        node_volume[node] = {}
1159
      elif not isinstance(lvdata, dict):
1160
        feedback_fn("  - ERROR: connection to %s failed (lvlist)" % (node,))
1161
        bad = True
1162
        continue
1163
      else:
1164
        node_volume[node] = lvdata
1165

    
1166
      # node_instance
1167
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1168
      if not isinstance(idata, list):
1169
        feedback_fn("  - ERROR: connection to %s failed (instancelist)" %
1170
                    (node,))
1171
        bad = True
1172
        continue
1173

    
1174
      node_instance[node] = idata
1175

    
1176
      # node_info
1177
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1178
      if not isinstance(nodeinfo, dict):
1179
        feedback_fn("  - ERROR: connection to %s failed (hvinfo)" % (node,))
1180
        bad = True
1181
        continue
1182

    
1183
      try:
1184
        node_info[node] = {
1185
          "mfree": int(nodeinfo['memory_free']),
1186
          "pinst": [],
1187
          "sinst": [],
1188
          # dictionary holding all instances this node is secondary for,
1189
          # grouped by their primary node. Each key is a cluster node, and each
1190
          # value is a list of instances which have the key as primary and the
1191
          # current node as secondary.  this is handy to calculate N+1 memory
1192
          # availability if you can only failover from a primary to its
1193
          # secondary.
1194
          "sinst-by-pnode": {},
1195
        }
1196
        # FIXME: devise a free space model for file based instances as well
1197
        if vg_name is not None:
1198
          if (constants.NV_VGLIST not in nresult or
1199
              vg_name not in nresult[constants.NV_VGLIST]):
1200
            feedback_fn("  - ERROR: node %s didn't return data for the"
1201
                        " volume group '%s' - it is either missing or broken" %
1202
                        (node, vg_name))
1203
            bad = True
1204
            continue
1205
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1206
      except (ValueError, KeyError):
1207
        feedback_fn("  - ERROR: invalid nodeinfo value returned"
1208
                    " from node %s" % (node,))
1209
        bad = True
1210
        continue
1211

    
1212
    node_vol_should = {}
1213

    
1214
    for instance in instancelist:
1215
      feedback_fn("* Verifying instance %s" % instance)
1216
      inst_config = instanceinfo[instance]
1217
      result =  self._VerifyInstance(instance, inst_config, node_volume,
1218
                                     node_instance, feedback_fn, n_offline)
1219
      bad = bad or result
1220
      inst_nodes_offline = []
1221

    
1222
      inst_config.MapLVsByNode(node_vol_should)
1223

    
1224
      instance_cfg[instance] = inst_config
1225

    
1226
      pnode = inst_config.primary_node
1227
      if pnode in node_info:
1228
        node_info[pnode]['pinst'].append(instance)
1229
      elif pnode not in n_offline:
1230
        feedback_fn("  - ERROR: instance %s, connection to primary node"
1231
                    " %s failed" % (instance, pnode))
1232
        bad = True
1233

    
1234
      if pnode in n_offline:
1235
        inst_nodes_offline.append(pnode)
1236

    
1237
      # If the instance is non-redundant we cannot survive losing its primary
1238
      # node, so we are not N+1 compliant. On the other hand we have no disk
1239
      # templates with more than one secondary so that situation is not well
1240
      # supported either.
1241
      # FIXME: does not support file-backed instances
1242
      if len(inst_config.secondary_nodes) == 0:
1243
        i_non_redundant.append(instance)
1244
      elif len(inst_config.secondary_nodes) > 1:
1245
        feedback_fn("  - WARNING: multiple secondaries for instance %s"
1246
                    % instance)
1247

    
1248
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1249
        i_non_a_balanced.append(instance)
1250

    
1251
      for snode in inst_config.secondary_nodes:
1252
        if snode in node_info:
1253
          node_info[snode]['sinst'].append(instance)
1254
          if pnode not in node_info[snode]['sinst-by-pnode']:
1255
            node_info[snode]['sinst-by-pnode'][pnode] = []
1256
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1257
        elif snode not in n_offline:
1258
          feedback_fn("  - ERROR: instance %s, connection to secondary node"
1259
                      " %s failed" % (instance, snode))
1260
          bad = True
1261
        if snode in n_offline:
1262
          inst_nodes_offline.append(snode)
1263

    
1264
      if inst_nodes_offline:
1265
        # warn that the instance lives on offline nodes, and set bad=True
1266
        feedback_fn("  - ERROR: instance lives on offline node(s) %s" %
1267
                    ", ".join(inst_nodes_offline))
1268
        bad = True
1269

    
1270
    feedback_fn("* Verifying orphan volumes")
1271
    result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
1272
                                       feedback_fn)
1273
    bad = bad or result
1274

    
1275
    feedback_fn("* Verifying remaining instances")
1276
    result = self._VerifyOrphanInstances(instancelist, node_instance,
1277
                                         feedback_fn)
1278
    bad = bad or result
1279

    
1280
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1281
      feedback_fn("* Verifying N+1 Memory redundancy")
1282
      result = self._VerifyNPlusOneMemory(node_info, instance_cfg, feedback_fn)
1283
      bad = bad or result
1284

    
1285
    feedback_fn("* Other Notes")
1286
    if i_non_redundant:
1287
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1288
                  % len(i_non_redundant))
1289

    
1290
    if i_non_a_balanced:
1291
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1292
                  % len(i_non_a_balanced))
1293

    
1294
    if n_offline:
1295
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1296

    
1297
    if n_drained:
1298
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1299

    
1300
    return not bad
1301

    
1302
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1303
    """Analyze the post-hooks' result
1304

1305
    This method analyses the hook result, handles it, and sends some
1306
    nicely-formatted feedback back to the user.
1307

1308
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1309
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1310
    @param hooks_results: the results of the multi-node hooks rpc call
1311
    @param feedback_fn: function used send feedback back to the caller
1312
    @param lu_result: previous Exec result
1313
    @return: the new Exec result, based on the previous result
1314
        and hook results
1315

1316
    """
1317
    # We only really run POST phase hooks, and are only interested in
1318
    # their results
1319
    if phase == constants.HOOKS_PHASE_POST:
1320
      # Used to change hooks' output to proper indentation
1321
      indent_re = re.compile('^', re.M)
1322
      feedback_fn("* Hooks Results")
1323
      if not hooks_results:
1324
        feedback_fn("  - ERROR: general communication failure")
1325
        lu_result = 1
1326
      else:
1327
        for node_name in hooks_results:
1328
          show_node_header = True
1329
          res = hooks_results[node_name]
1330
          msg = res.fail_msg
1331
          if msg:
1332
            if res.offline:
1333
              # no need to warn or set fail return value
1334
              continue
1335
            feedback_fn("    Communication failure in hooks execution: %s" %
1336
                        msg)
1337
            lu_result = 1
1338
            continue
1339
          for script, hkr, output in res.payload:
1340
            if hkr == constants.HKR_FAIL:
1341
              # The node header is only shown once, if there are
1342
              # failing hooks on that node
1343
              if show_node_header:
1344
                feedback_fn("  Node %s:" % node_name)
1345
                show_node_header = False
1346
              feedback_fn("    ERROR: Script %s failed, output:" % script)
1347
              output = indent_re.sub('      ', output)
1348
              feedback_fn("%s" % output)
1349
              lu_result = 1
1350

    
1351
      return lu_result
1352

    
1353

    
1354
class LUVerifyDisks(NoHooksLU):
1355
  """Verifies the cluster disks status.
1356

1357
  """
1358
  _OP_REQP = []
1359
  REQ_BGL = False
1360

    
1361
  def ExpandNames(self):
1362
    self.needed_locks = {
1363
      locking.LEVEL_NODE: locking.ALL_SET,
1364
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1365
    }
1366
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1367

    
1368
  def CheckPrereq(self):
1369
    """Check prerequisites.
1370

1371
    This has no prerequisites.
1372

1373
    """
1374
    pass
1375

    
1376
  def Exec(self, feedback_fn):
1377
    """Verify integrity of cluster disks.
1378

1379
    @rtype: tuple of three items
1380
    @return: a tuple of (dict of node-to-node_error, list of instances
1381
        which need activate-disks, dict of instance: (node, volume) for
1382
        missing volumes
1383

1384
    """
1385
    result = res_nodes, res_instances, res_missing = {}, [], {}
1386

    
1387
    vg_name = self.cfg.GetVGName()
1388
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1389
    instances = [self.cfg.GetInstanceInfo(name)
1390
                 for name in self.cfg.GetInstanceList()]
1391

    
1392
    nv_dict = {}
1393
    for inst in instances:
1394
      inst_lvs = {}
1395
      if (not inst.admin_up or
1396
          inst.disk_template not in constants.DTS_NET_MIRROR):
1397
        continue
1398
      inst.MapLVsByNode(inst_lvs)
1399
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1400
      for node, vol_list in inst_lvs.iteritems():
1401
        for vol in vol_list:
1402
          nv_dict[(node, vol)] = inst
1403

    
1404
    if not nv_dict:
1405
      return result
1406

    
1407
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1408

    
1409
    for node in nodes:
1410
      # node_volume
1411
      node_res = node_lvs[node]
1412
      if node_res.offline:
1413
        continue
1414
      msg = node_res.fail_msg
1415
      if msg:
1416
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1417
        res_nodes[node] = msg
1418
        continue
1419

    
1420
      lvs = node_res.payload
1421
      for lv_name, (_, lv_inactive, lv_online) in lvs.items():
1422
        inst = nv_dict.pop((node, lv_name), None)
1423
        if (not lv_online and inst is not None
1424
            and inst.name not in res_instances):
1425
          res_instances.append(inst.name)
1426

    
1427
    # any leftover items in nv_dict are missing LVs, let's arrange the
1428
    # data better
1429
    for key, inst in nv_dict.iteritems():
1430
      if inst.name not in res_missing:
1431
        res_missing[inst.name] = []
1432
      res_missing[inst.name].append(key)
1433

    
1434
    return result
1435

    
1436

    
1437
class LURenameCluster(LogicalUnit):
1438
  """Rename the cluster.
1439

1440
  """
1441
  HPATH = "cluster-rename"
1442
  HTYPE = constants.HTYPE_CLUSTER
1443
  _OP_REQP = ["name"]
1444

    
1445
  def BuildHooksEnv(self):
1446
    """Build hooks env.
1447

1448
    """
1449
    env = {
1450
      "OP_TARGET": self.cfg.GetClusterName(),
1451
      "NEW_NAME": self.op.name,
1452
      }
1453
    mn = self.cfg.GetMasterNode()
1454
    return env, [mn], [mn]
1455

    
1456
  def CheckPrereq(self):
1457
    """Verify that the passed name is a valid one.
1458

1459
    """
1460
    hostname = utils.HostInfo(self.op.name)
1461

    
1462
    new_name = hostname.name
1463
    self.ip = new_ip = hostname.ip
1464
    old_name = self.cfg.GetClusterName()
1465
    old_ip = self.cfg.GetMasterIP()
1466
    if new_name == old_name and new_ip == old_ip:
1467
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1468
                                 " cluster has changed")
1469
    if new_ip != old_ip:
1470
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1471
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1472
                                   " reachable on the network. Aborting." %
1473
                                   new_ip)
1474

    
1475
    self.op.name = new_name
1476

    
1477
  def Exec(self, feedback_fn):
1478
    """Rename the cluster.
1479

1480
    """
1481
    clustername = self.op.name
1482
    ip = self.ip
1483

    
1484
    # shutdown the master IP
1485
    master = self.cfg.GetMasterNode()
1486
    result = self.rpc.call_node_stop_master(master, False)
1487
    result.Raise("Could not disable the master role")
1488

    
1489
    try:
1490
      cluster = self.cfg.GetClusterInfo()
1491
      cluster.cluster_name = clustername
1492
      cluster.master_ip = ip
1493
      self.cfg.Update(cluster)
1494

    
1495
      # update the known hosts file
1496
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1497
      node_list = self.cfg.GetNodeList()
1498
      try:
1499
        node_list.remove(master)
1500
      except ValueError:
1501
        pass
1502
      result = self.rpc.call_upload_file(node_list,
1503
                                         constants.SSH_KNOWN_HOSTS_FILE)
1504
      for to_node, to_result in result.iteritems():
1505
        msg = to_result.fail_msg
1506
        if msg:
1507
          msg = ("Copy of file %s to node %s failed: %s" %
1508
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1509
          self.proc.LogWarning(msg)
1510

    
1511
    finally:
1512
      result = self.rpc.call_node_start_master(master, False, False)
1513
      msg = result.fail_msg
1514
      if msg:
1515
        self.LogWarning("Could not re-enable the master role on"
1516
                        " the master, please restart manually: %s", msg)
1517

    
1518

    
1519
def _RecursiveCheckIfLVMBased(disk):
1520
  """Check if the given disk or its children are lvm-based.
1521

1522
  @type disk: L{objects.Disk}
1523
  @param disk: the disk to check
1524
  @rtype: boolean
1525
  @return: boolean indicating whether a LD_LV dev_type was found or not
1526

1527
  """
1528
  if disk.children:
1529
    for chdisk in disk.children:
1530
      if _RecursiveCheckIfLVMBased(chdisk):
1531
        return True
1532
  return disk.dev_type == constants.LD_LV
1533

    
1534

    
1535
class LUSetClusterParams(LogicalUnit):
1536
  """Change the parameters of the cluster.
1537

1538
  """
1539
  HPATH = "cluster-modify"
1540
  HTYPE = constants.HTYPE_CLUSTER
1541
  _OP_REQP = []
1542
  REQ_BGL = False
1543

    
1544
  def CheckArguments(self):
1545
    """Check parameters
1546

1547
    """
1548
    if not hasattr(self.op, "candidate_pool_size"):
1549
      self.op.candidate_pool_size = None
1550
    if self.op.candidate_pool_size is not None:
1551
      try:
1552
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1553
      except (ValueError, TypeError), err:
1554
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1555
                                   str(err))
1556
      if self.op.candidate_pool_size < 1:
1557
        raise errors.OpPrereqError("At least one master candidate needed")
1558

    
1559
  def ExpandNames(self):
1560
    # FIXME: in the future maybe other cluster params won't require checking on
1561
    # all nodes to be modified.
1562
    self.needed_locks = {
1563
      locking.LEVEL_NODE: locking.ALL_SET,
1564
    }
1565
    self.share_locks[locking.LEVEL_NODE] = 1
1566

    
1567
  def BuildHooksEnv(self):
1568
    """Build hooks env.
1569

1570
    """
1571
    env = {
1572
      "OP_TARGET": self.cfg.GetClusterName(),
1573
      "NEW_VG_NAME": self.op.vg_name,
1574
      }
1575
    mn = self.cfg.GetMasterNode()
1576
    return env, [mn], [mn]
1577

    
1578
  def CheckPrereq(self):
1579
    """Check prerequisites.
1580

1581
    This checks whether the given params don't conflict and
1582
    if the given volume group is valid.
1583

1584
    """
1585
    if self.op.vg_name is not None and not self.op.vg_name:
1586
      instances = self.cfg.GetAllInstancesInfo().values()
1587
      for inst in instances:
1588
        for disk in inst.disks:
1589
          if _RecursiveCheckIfLVMBased(disk):
1590
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1591
                                       " lvm-based instances exist")
1592

    
1593
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1594

    
1595
    # if vg_name not None, checks given volume group on all nodes
1596
    if self.op.vg_name:
1597
      vglist = self.rpc.call_vg_list(node_list)
1598
      for node in node_list:
1599
        msg = vglist[node].fail_msg
1600
        if msg:
1601
          # ignoring down node
1602
          self.LogWarning("Error while gathering data on node %s"
1603
                          " (ignoring node): %s", node, msg)
1604
          continue
1605
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
1606
                                              self.op.vg_name,
1607
                                              constants.MIN_VG_SIZE)
1608
        if vgstatus:
1609
          raise errors.OpPrereqError("Error on node '%s': %s" %
1610
                                     (node, vgstatus))
1611

    
1612
    self.cluster = cluster = self.cfg.GetClusterInfo()
1613
    # validate params changes
1614
    if self.op.beparams:
1615
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1616
      self.new_beparams = objects.FillDict(
1617
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
1618

    
1619
    if self.op.nicparams:
1620
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
1621
      self.new_nicparams = objects.FillDict(
1622
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
1623
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
1624

    
1625
    # hypervisor list/parameters
1626
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
1627
    if self.op.hvparams:
1628
      if not isinstance(self.op.hvparams, dict):
1629
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
1630
      for hv_name, hv_dict in self.op.hvparams.items():
1631
        if hv_name not in self.new_hvparams:
1632
          self.new_hvparams[hv_name] = hv_dict
1633
        else:
1634
          self.new_hvparams[hv_name].update(hv_dict)
1635

    
1636
    if self.op.enabled_hypervisors is not None:
1637
      self.hv_list = self.op.enabled_hypervisors
1638
      if not self.hv_list:
1639
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
1640
                                   " least one member")
1641
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
1642
      if invalid_hvs:
1643
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
1644
                                   " entries: %s" % invalid_hvs)
1645
    else:
1646
      self.hv_list = cluster.enabled_hypervisors
1647

    
1648
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
1649
      # either the enabled list has changed, or the parameters have, validate
1650
      for hv_name, hv_params in self.new_hvparams.items():
1651
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
1652
            (self.op.enabled_hypervisors and
1653
             hv_name in self.op.enabled_hypervisors)):
1654
          # either this is a new hypervisor, or its parameters have changed
1655
          hv_class = hypervisor.GetHypervisor(hv_name)
1656
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1657
          hv_class.CheckParameterSyntax(hv_params)
1658
          _CheckHVParams(self, node_list, hv_name, hv_params)
1659

    
1660
  def Exec(self, feedback_fn):
1661
    """Change the parameters of the cluster.
1662

1663
    """
1664
    if self.op.vg_name is not None:
1665
      new_volume = self.op.vg_name
1666
      if not new_volume:
1667
        new_volume = None
1668
      if new_volume != self.cfg.GetVGName():
1669
        self.cfg.SetVGName(new_volume)
1670
      else:
1671
        feedback_fn("Cluster LVM configuration already in desired"
1672
                    " state, not changing")
1673
    if self.op.hvparams:
1674
      self.cluster.hvparams = self.new_hvparams
1675
    if self.op.enabled_hypervisors is not None:
1676
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1677
    if self.op.beparams:
1678
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
1679
    if self.op.nicparams:
1680
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
1681

    
1682
    if self.op.candidate_pool_size is not None:
1683
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
1684
      # we need to update the pool size here, otherwise the save will fail
1685
      _AdjustCandidatePool(self)
1686

    
1687
    self.cfg.Update(self.cluster)
1688

    
1689

    
1690
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
1691
  """Distribute additional files which are part of the cluster configuration.
1692

1693
  ConfigWriter takes care of distributing the config and ssconf files, but
1694
  there are more files which should be distributed to all nodes. This function
1695
  makes sure those are copied.
1696

1697
  @param lu: calling logical unit
1698
  @param additional_nodes: list of nodes not in the config to distribute to
1699

1700
  """
1701
  # 1. Gather target nodes
1702
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
1703
  dist_nodes = lu.cfg.GetNodeList()
1704
  if additional_nodes is not None:
1705
    dist_nodes.extend(additional_nodes)
1706
  if myself.name in dist_nodes:
1707
    dist_nodes.remove(myself.name)
1708
  # 2. Gather files to distribute
1709
  dist_files = set([constants.ETC_HOSTS,
1710
                    constants.SSH_KNOWN_HOSTS_FILE,
1711
                    constants.RAPI_CERT_FILE,
1712
                    constants.RAPI_USERS_FILE,
1713
                    constants.HMAC_CLUSTER_KEY,
1714
                   ])
1715

    
1716
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
1717
  for hv_name in enabled_hypervisors:
1718
    hv_class = hypervisor.GetHypervisor(hv_name)
1719
    dist_files.update(hv_class.GetAncillaryFiles())
1720

    
1721
  # 3. Perform the files upload
1722
  for fname in dist_files:
1723
    if os.path.exists(fname):
1724
      result = lu.rpc.call_upload_file(dist_nodes, fname)
1725
      for to_node, to_result in result.items():
1726
        msg = to_result.fail_msg
1727
        if msg:
1728
          msg = ("Copy of file %s to node %s failed: %s" %
1729
                 (fname, to_node, msg))
1730
          lu.proc.LogWarning(msg)
1731

    
1732

    
1733
class LURedistributeConfig(NoHooksLU):
1734
  """Force the redistribution of cluster configuration.
1735

1736
  This is a very simple LU.
1737

1738
  """
1739
  _OP_REQP = []
1740
  REQ_BGL = False
1741

    
1742
  def ExpandNames(self):
1743
    self.needed_locks = {
1744
      locking.LEVEL_NODE: locking.ALL_SET,
1745
    }
1746
    self.share_locks[locking.LEVEL_NODE] = 1
1747

    
1748
  def CheckPrereq(self):
1749
    """Check prerequisites.
1750

1751
    """
1752

    
1753
  def Exec(self, feedback_fn):
1754
    """Redistribute the configuration.
1755

1756
    """
1757
    self.cfg.Update(self.cfg.GetClusterInfo())
1758
    _RedistributeAncillaryFiles(self)
1759

    
1760

    
1761
def _WaitForSync(lu, instance, oneshot=False, unlock=False):
1762
  """Sleep and poll for an instance's disk to sync.
1763

1764
  """
1765
  if not instance.disks:
1766
    return True
1767

    
1768
  if not oneshot:
1769
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
1770

    
1771
  node = instance.primary_node
1772

    
1773
  for dev in instance.disks:
1774
    lu.cfg.SetDiskID(dev, node)
1775

    
1776
  retries = 0
1777
  degr_retries = 10 # in seconds, as we sleep 1 second each time
1778
  while True:
1779
    max_time = 0
1780
    done = True
1781
    cumul_degraded = False
1782
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
1783
    msg = rstats.fail_msg
1784
    if msg:
1785
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
1786
      retries += 1
1787
      if retries >= 10:
1788
        raise errors.RemoteError("Can't contact node %s for mirror data,"
1789
                                 " aborting." % node)
1790
      time.sleep(6)
1791
      continue
1792
    rstats = rstats.payload
1793
    retries = 0
1794
    for i, mstat in enumerate(rstats):
1795
      if mstat is None:
1796
        lu.LogWarning("Can't compute data for node %s/%s",
1797
                           node, instance.disks[i].iv_name)
1798
        continue
1799
      # we ignore the ldisk parameter
1800
      perc_done, est_time, is_degraded, _ = mstat
1801
      cumul_degraded = cumul_degraded or (is_degraded and perc_done is None)
1802
      if perc_done is not None:
1803
        done = False
1804
        if est_time is not None:
1805
          rem_time = "%d estimated seconds remaining" % est_time
1806
          max_time = est_time
1807
        else:
1808
          rem_time = "no time estimate"
1809
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
1810
                        (instance.disks[i].iv_name, perc_done, rem_time))
1811

    
1812
    # if we're done but degraded, let's do a few small retries, to
1813
    # make sure we see a stable and not transient situation; therefore
1814
    # we force restart of the loop
1815
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
1816
      logging.info("Degraded disks found, %d retries left", degr_retries)
1817
      degr_retries -= 1
1818
      time.sleep(1)
1819
      continue
1820

    
1821
    if done or oneshot:
1822
      break
1823

    
1824
    time.sleep(min(60, max_time))
1825

    
1826
  if done:
1827
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
1828
  return not cumul_degraded
1829

    
1830

    
1831
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
1832
  """Check that mirrors are not degraded.
1833

1834
  The ldisk parameter, if True, will change the test from the
1835
  is_degraded attribute (which represents overall non-ok status for
1836
  the device(s)) to the ldisk (representing the local storage status).
1837

1838
  """
1839
  lu.cfg.SetDiskID(dev, node)
1840
  if ldisk:
1841
    idx = 6
1842
  else:
1843
    idx = 5
1844

    
1845
  result = True
1846
  if on_primary or dev.AssembleOnSecondary():
1847
    rstats = lu.rpc.call_blockdev_find(node, dev)
1848
    msg = rstats.fail_msg
1849
    if msg:
1850
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
1851
      result = False
1852
    elif not rstats.payload:
1853
      lu.LogWarning("Can't find disk on node %s", node)
1854
      result = False
1855
    else:
1856
      result = result and (not rstats.payload[idx])
1857
  if dev.children:
1858
    for child in dev.children:
1859
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
1860

    
1861
  return result
1862

    
1863

    
1864
class LUDiagnoseOS(NoHooksLU):
1865
  """Logical unit for OS diagnose/query.
1866

1867
  """
1868
  _OP_REQP = ["output_fields", "names"]
1869
  REQ_BGL = False
1870
  _FIELDS_STATIC = utils.FieldSet()
1871
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status")
1872

    
1873
  def ExpandNames(self):
1874
    if self.op.names:
1875
      raise errors.OpPrereqError("Selective OS query not supported")
1876

    
1877
    _CheckOutputFields(static=self._FIELDS_STATIC,
1878
                       dynamic=self._FIELDS_DYNAMIC,
1879
                       selected=self.op.output_fields)
1880

    
1881
    # Lock all nodes, in shared mode
1882
    # Temporary removal of locks, should be reverted later
1883
    # TODO: reintroduce locks when they are lighter-weight
1884
    self.needed_locks = {}
1885
    #self.share_locks[locking.LEVEL_NODE] = 1
1886
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
1887

    
1888
  def CheckPrereq(self):
1889
    """Check prerequisites.
1890

1891
    """
1892

    
1893
  @staticmethod
1894
  def _DiagnoseByOS(node_list, rlist):
1895
    """Remaps a per-node return list into an a per-os per-node dictionary
1896

1897
    @param node_list: a list with the names of all nodes
1898
    @param rlist: a map with node names as keys and OS objects as values
1899

1900
    @rtype: dict
1901
    @return: a dictionary with osnames as keys and as value another map, with
1902
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
1903

1904
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
1905
                                     (/srv/..., False, "invalid api")],
1906
                           "node2": [(/srv/..., True, "")]}
1907
          }
1908

1909
    """
1910
    all_os = {}
1911
    # we build here the list of nodes that didn't fail the RPC (at RPC
1912
    # level), so that nodes with a non-responding node daemon don't
1913
    # make all OSes invalid
1914
    good_nodes = [node_name for node_name in rlist
1915
                  if not rlist[node_name].fail_msg]
1916
    for node_name, nr in rlist.items():
1917
      if nr.fail_msg or not nr.payload:
1918
        continue
1919
      for name, path, status, diagnose in nr.payload:
1920
        if name not in all_os:
1921
          # build a list of nodes for this os containing empty lists
1922
          # for each node in node_list
1923
          all_os[name] = {}
1924
          for nname in good_nodes:
1925
            all_os[name][nname] = []
1926
        all_os[name][node_name].append((path, status, diagnose))
1927
    return all_os
1928

    
1929
  def Exec(self, feedback_fn):
1930
    """Compute the list of OSes.
1931

1932
    """
1933
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
1934
    node_data = self.rpc.call_os_diagnose(valid_nodes)
1935
    pol = self._DiagnoseByOS(valid_nodes, node_data)
1936
    output = []
1937
    for os_name, os_data in pol.items():
1938
      row = []
1939
      for field in self.op.output_fields:
1940
        if field == "name":
1941
          val = os_name
1942
        elif field == "valid":
1943
          val = utils.all([osl and osl[0][1] for osl in os_data.values()])
1944
        elif field == "node_status":
1945
          # this is just a copy of the dict
1946
          val = {}
1947
          for node_name, nos_list in os_data.items():
1948
            val[node_name] = nos_list
1949
        else:
1950
          raise errors.ParameterError(field)
1951
        row.append(val)
1952
      output.append(row)
1953

    
1954
    return output
1955

    
1956

    
1957
class LURemoveNode(LogicalUnit):
1958
  """Logical unit for removing a node.
1959

1960
  """
1961
  HPATH = "node-remove"
1962
  HTYPE = constants.HTYPE_NODE
1963
  _OP_REQP = ["node_name"]
1964

    
1965
  def BuildHooksEnv(self):
1966
    """Build hooks env.
1967

1968
    This doesn't run on the target node in the pre phase as a failed
1969
    node would then be impossible to remove.
1970

1971
    """
1972
    env = {
1973
      "OP_TARGET": self.op.node_name,
1974
      "NODE_NAME": self.op.node_name,
1975
      }
1976
    all_nodes = self.cfg.GetNodeList()
1977
    all_nodes.remove(self.op.node_name)
1978
    return env, all_nodes, all_nodes
1979

    
1980
  def CheckPrereq(self):
1981
    """Check prerequisites.
1982

1983
    This checks:
1984
     - the node exists in the configuration
1985
     - it does not have primary or secondary instances
1986
     - it's not the master
1987

1988
    Any errors are signaled by raising errors.OpPrereqError.
1989

1990
    """
1991
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
1992
    if node is None:
1993
      raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
1994

    
1995
    instance_list = self.cfg.GetInstanceList()
1996

    
1997
    masternode = self.cfg.GetMasterNode()
1998
    if node.name == masternode:
1999
      raise errors.OpPrereqError("Node is the master node,"
2000
                                 " you need to failover first.")
2001

    
2002
    for instance_name in instance_list:
2003
      instance = self.cfg.GetInstanceInfo(instance_name)
2004
      if node.name in instance.all_nodes:
2005
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2006
                                   " please remove first." % instance_name)
2007
    self.op.node_name = node.name
2008
    self.node = node
2009

    
2010
  def Exec(self, feedback_fn):
2011
    """Removes the node from the cluster.
2012

2013
    """
2014
    node = self.node
2015
    logging.info("Stopping the node daemon and removing configs from node %s",
2016
                 node.name)
2017

    
2018
    self.context.RemoveNode(node.name)
2019

    
2020
    result = self.rpc.call_node_leave_cluster(node.name)
2021
    msg = result.fail_msg
2022
    if msg:
2023
      self.LogWarning("Errors encountered on the remote node while leaving"
2024
                      " the cluster: %s", msg)
2025

    
2026
    # Promote nodes to master candidate as needed
2027
    _AdjustCandidatePool(self)
2028

    
2029

    
2030
class LUQueryNodes(NoHooksLU):
2031
  """Logical unit for querying nodes.
2032

2033
  """
2034
  _OP_REQP = ["output_fields", "names", "use_locking"]
2035
  REQ_BGL = False
2036
  _FIELDS_DYNAMIC = utils.FieldSet(
2037
    "dtotal", "dfree",
2038
    "mtotal", "mnode", "mfree",
2039
    "bootid",
2040
    "ctotal", "cnodes", "csockets",
2041
    )
2042

    
2043
  _FIELDS_STATIC = utils.FieldSet(
2044
    "name", "pinst_cnt", "sinst_cnt",
2045
    "pinst_list", "sinst_list",
2046
    "pip", "sip", "tags",
2047
    "serial_no",
2048
    "master_candidate",
2049
    "master",
2050
    "offline",
2051
    "drained",
2052
    "role",
2053
    )
2054

    
2055
  def ExpandNames(self):
2056
    _CheckOutputFields(static=self._FIELDS_STATIC,
2057
                       dynamic=self._FIELDS_DYNAMIC,
2058
                       selected=self.op.output_fields)
2059

    
2060
    self.needed_locks = {}
2061
    self.share_locks[locking.LEVEL_NODE] = 1
2062

    
2063
    if self.op.names:
2064
      self.wanted = _GetWantedNodes(self, self.op.names)
2065
    else:
2066
      self.wanted = locking.ALL_SET
2067

    
2068
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2069
    self.do_locking = self.do_node_query and self.op.use_locking
2070
    if self.do_locking:
2071
      # if we don't request only static fields, we need to lock the nodes
2072
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2073

    
2074

    
2075
  def CheckPrereq(self):
2076
    """Check prerequisites.
2077

2078
    """
2079
    # The validation of the node list is done in the _GetWantedNodes,
2080
    # if non empty, and if empty, there's no validation to do
2081
    pass
2082

    
2083
  def Exec(self, feedback_fn):
2084
    """Computes the list of nodes and their attributes.
2085

2086
    """
2087
    all_info = self.cfg.GetAllNodesInfo()
2088
    if self.do_locking:
2089
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2090
    elif self.wanted != locking.ALL_SET:
2091
      nodenames = self.wanted
2092
      missing = set(nodenames).difference(all_info.keys())
2093
      if missing:
2094
        raise errors.OpExecError(
2095
          "Some nodes were removed before retrieving their data: %s" % missing)
2096
    else:
2097
      nodenames = all_info.keys()
2098

    
2099
    nodenames = utils.NiceSort(nodenames)
2100
    nodelist = [all_info[name] for name in nodenames]
2101

    
2102
    # begin data gathering
2103

    
2104
    if self.do_node_query:
2105
      live_data = {}
2106
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2107
                                          self.cfg.GetHypervisorType())
2108
      for name in nodenames:
2109
        nodeinfo = node_data[name]
2110
        if not nodeinfo.fail_msg and nodeinfo.payload:
2111
          nodeinfo = nodeinfo.payload
2112
          fn = utils.TryConvert
2113
          live_data[name] = {
2114
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2115
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2116
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2117
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2118
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2119
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2120
            "bootid": nodeinfo.get('bootid', None),
2121
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2122
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2123
            }
2124
        else:
2125
          live_data[name] = {}
2126
    else:
2127
      live_data = dict.fromkeys(nodenames, {})
2128

    
2129
    node_to_primary = dict([(name, set()) for name in nodenames])
2130
    node_to_secondary = dict([(name, set()) for name in nodenames])
2131

    
2132
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2133
                             "sinst_cnt", "sinst_list"))
2134
    if inst_fields & frozenset(self.op.output_fields):
2135
      instancelist = self.cfg.GetInstanceList()
2136

    
2137
      for instance_name in instancelist:
2138
        inst = self.cfg.GetInstanceInfo(instance_name)
2139
        if inst.primary_node in node_to_primary:
2140
          node_to_primary[inst.primary_node].add(inst.name)
2141
        for secnode in inst.secondary_nodes:
2142
          if secnode in node_to_secondary:
2143
            node_to_secondary[secnode].add(inst.name)
2144

    
2145
    master_node = self.cfg.GetMasterNode()
2146

    
2147
    # end data gathering
2148

    
2149
    output = []
2150
    for node in nodelist:
2151
      node_output = []
2152
      for field in self.op.output_fields:
2153
        if field == "name":
2154
          val = node.name
2155
        elif field == "pinst_list":
2156
          val = list(node_to_primary[node.name])
2157
        elif field == "sinst_list":
2158
          val = list(node_to_secondary[node.name])
2159
        elif field == "pinst_cnt":
2160
          val = len(node_to_primary[node.name])
2161
        elif field == "sinst_cnt":
2162
          val = len(node_to_secondary[node.name])
2163
        elif field == "pip":
2164
          val = node.primary_ip
2165
        elif field == "sip":
2166
          val = node.secondary_ip
2167
        elif field == "tags":
2168
          val = list(node.GetTags())
2169
        elif field == "serial_no":
2170
          val = node.serial_no
2171
        elif field == "master_candidate":
2172
          val = node.master_candidate
2173
        elif field == "master":
2174
          val = node.name == master_node
2175
        elif field == "offline":
2176
          val = node.offline
2177
        elif field == "drained":
2178
          val = node.drained
2179
        elif self._FIELDS_DYNAMIC.Matches(field):
2180
          val = live_data[node.name].get(field, None)
2181
        elif field == "role":
2182
          if node.name == master_node:
2183
            val = "M"
2184
          elif node.master_candidate:
2185
            val = "C"
2186
          elif node.drained:
2187
            val = "D"
2188
          elif node.offline:
2189
            val = "O"
2190
          else:
2191
            val = "R"
2192
        else:
2193
          raise errors.ParameterError(field)
2194
        node_output.append(val)
2195
      output.append(node_output)
2196

    
2197
    return output
2198

    
2199

    
2200
class LUQueryNodeVolumes(NoHooksLU):
2201
  """Logical unit for getting volumes on node(s).
2202

2203
  """
2204
  _OP_REQP = ["nodes", "output_fields"]
2205
  REQ_BGL = False
2206
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2207
  _FIELDS_STATIC = utils.FieldSet("node")
2208

    
2209
  def ExpandNames(self):
2210
    _CheckOutputFields(static=self._FIELDS_STATIC,
2211
                       dynamic=self._FIELDS_DYNAMIC,
2212
                       selected=self.op.output_fields)
2213

    
2214
    self.needed_locks = {}
2215
    self.share_locks[locking.LEVEL_NODE] = 1
2216
    if not self.op.nodes:
2217
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2218
    else:
2219
      self.needed_locks[locking.LEVEL_NODE] = \
2220
        _GetWantedNodes(self, self.op.nodes)
2221

    
2222
  def CheckPrereq(self):
2223
    """Check prerequisites.
2224

2225
    This checks that the fields required are valid output fields.
2226

2227
    """
2228
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2229

    
2230
  def Exec(self, feedback_fn):
2231
    """Computes the list of nodes and their attributes.
2232

2233
    """
2234
    nodenames = self.nodes
2235
    volumes = self.rpc.call_node_volumes(nodenames)
2236

    
2237
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2238
             in self.cfg.GetInstanceList()]
2239

    
2240
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2241

    
2242
    output = []
2243
    for node in nodenames:
2244
      nresult = volumes[node]
2245
      if nresult.offline:
2246
        continue
2247
      msg = nresult.fail_msg
2248
      if msg:
2249
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2250
        continue
2251

    
2252
      node_vols = nresult.payload[:]
2253
      node_vols.sort(key=lambda vol: vol['dev'])
2254

    
2255
      for vol in node_vols:
2256
        node_output = []
2257
        for field in self.op.output_fields:
2258
          if field == "node":
2259
            val = node
2260
          elif field == "phys":
2261
            val = vol['dev']
2262
          elif field == "vg":
2263
            val = vol['vg']
2264
          elif field == "name":
2265
            val = vol['name']
2266
          elif field == "size":
2267
            val = int(float(vol['size']))
2268
          elif field == "instance":
2269
            for inst in ilist:
2270
              if node not in lv_by_node[inst]:
2271
                continue
2272
              if vol['name'] in lv_by_node[inst][node]:
2273
                val = inst.name
2274
                break
2275
            else:
2276
              val = '-'
2277
          else:
2278
            raise errors.ParameterError(field)
2279
          node_output.append(str(val))
2280

    
2281
        output.append(node_output)
2282

    
2283
    return output
2284

    
2285

    
2286
class LUAddNode(LogicalUnit):
2287
  """Logical unit for adding node to the cluster.
2288

2289
  """
2290
  HPATH = "node-add"
2291
  HTYPE = constants.HTYPE_NODE
2292
  _OP_REQP = ["node_name"]
2293

    
2294
  def BuildHooksEnv(self):
2295
    """Build hooks env.
2296

2297
    This will run on all nodes before, and on all nodes + the new node after.
2298

2299
    """
2300
    env = {
2301
      "OP_TARGET": self.op.node_name,
2302
      "NODE_NAME": self.op.node_name,
2303
      "NODE_PIP": self.op.primary_ip,
2304
      "NODE_SIP": self.op.secondary_ip,
2305
      }
2306
    nodes_0 = self.cfg.GetNodeList()
2307
    nodes_1 = nodes_0 + [self.op.node_name, ]
2308
    return env, nodes_0, nodes_1
2309

    
2310
  def CheckPrereq(self):
2311
    """Check prerequisites.
2312

2313
    This checks:
2314
     - the new node is not already in the config
2315
     - it is resolvable
2316
     - its parameters (single/dual homed) matches the cluster
2317

2318
    Any errors are signaled by raising errors.OpPrereqError.
2319

2320
    """
2321
    node_name = self.op.node_name
2322
    cfg = self.cfg
2323

    
2324
    dns_data = utils.HostInfo(node_name)
2325

    
2326
    node = dns_data.name
2327
    primary_ip = self.op.primary_ip = dns_data.ip
2328
    secondary_ip = getattr(self.op, "secondary_ip", None)
2329
    if secondary_ip is None:
2330
      secondary_ip = primary_ip
2331
    if not utils.IsValidIP(secondary_ip):
2332
      raise errors.OpPrereqError("Invalid secondary IP given")
2333
    self.op.secondary_ip = secondary_ip
2334

    
2335
    node_list = cfg.GetNodeList()
2336
    if not self.op.readd and node in node_list:
2337
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2338
                                 node)
2339
    elif self.op.readd and node not in node_list:
2340
      raise errors.OpPrereqError("Node %s is not in the configuration" % node)
2341

    
2342
    for existing_node_name in node_list:
2343
      existing_node = cfg.GetNodeInfo(existing_node_name)
2344

    
2345
      if self.op.readd and node == existing_node_name:
2346
        if (existing_node.primary_ip != primary_ip or
2347
            existing_node.secondary_ip != secondary_ip):
2348
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2349
                                     " address configuration as before")
2350
        continue
2351

    
2352
      if (existing_node.primary_ip == primary_ip or
2353
          existing_node.secondary_ip == primary_ip or
2354
          existing_node.primary_ip == secondary_ip or
2355
          existing_node.secondary_ip == secondary_ip):
2356
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2357
                                   " existing node %s" % existing_node.name)
2358

    
2359
    # check that the type of the node (single versus dual homed) is the
2360
    # same as for the master
2361
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2362
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2363
    newbie_singlehomed = secondary_ip == primary_ip
2364
    if master_singlehomed != newbie_singlehomed:
2365
      if master_singlehomed:
2366
        raise errors.OpPrereqError("The master has no private ip but the"
2367
                                   " new node has one")
2368
      else:
2369
        raise errors.OpPrereqError("The master has a private ip but the"
2370
                                   " new node doesn't have one")
2371

    
2372
    # checks reachability
2373
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2374
      raise errors.OpPrereqError("Node not reachable by ping")
2375

    
2376
    if not newbie_singlehomed:
2377
      # check reachability from my secondary ip to newbie's secondary ip
2378
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2379
                           source=myself.secondary_ip):
2380
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2381
                                   " based ping to noded port")
2382

    
2383
    cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2384
    if self.op.readd:
2385
      exceptions = [node]
2386
    else:
2387
      exceptions = []
2388
    mc_now, mc_max = self.cfg.GetMasterCandidateStats(exceptions)
2389
    # the new node will increase mc_max with one, so:
2390
    mc_max = min(mc_max + 1, cp_size)
2391
    self.master_candidate = mc_now < mc_max
2392

    
2393
    if self.op.readd:
2394
      self.new_node = self.cfg.GetNodeInfo(node)
2395
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
2396
    else:
2397
      self.new_node = objects.Node(name=node,
2398
                                   primary_ip=primary_ip,
2399
                                   secondary_ip=secondary_ip,
2400
                                   master_candidate=self.master_candidate,
2401
                                   offline=False, drained=False)
2402

    
2403
  def Exec(self, feedback_fn):
2404
    """Adds the new node to the cluster.
2405

2406
    """
2407
    new_node = self.new_node
2408
    node = new_node.name
2409

    
2410
    # for re-adds, reset the offline/drained/master-candidate flags;
2411
    # we need to reset here, otherwise offline would prevent RPC calls
2412
    # later in the procedure; this also means that if the re-add
2413
    # fails, we are left with a non-offlined, broken node
2414
    if self.op.readd:
2415
      new_node.drained = new_node.offline = False
2416
      self.LogInfo("Readding a node, the offline/drained flags were reset")
2417
      # if we demote the node, we do cleanup later in the procedure
2418
      new_node.master_candidate = self.master_candidate
2419

    
2420
    # notify the user about any possible mc promotion
2421
    if new_node.master_candidate:
2422
      self.LogInfo("Node will be a master candidate")
2423

    
2424
    # check connectivity
2425
    result = self.rpc.call_version([node])[node]
2426
    result.Raise("Can't get version information from node %s" % node)
2427
    if constants.PROTOCOL_VERSION == result.payload:
2428
      logging.info("Communication to node %s fine, sw version %s match",
2429
                   node, result.payload)
2430
    else:
2431
      raise errors.OpExecError("Version mismatch master version %s,"
2432
                               " node version %s" %
2433
                               (constants.PROTOCOL_VERSION, result.payload))
2434

    
2435
    # setup ssh on node
2436
    logging.info("Copy ssh key to node %s", node)
2437
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
2438
    keyarray = []
2439
    keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
2440
                constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
2441
                priv_key, pub_key]
2442

    
2443
    for i in keyfiles:
2444
      f = open(i, 'r')
2445
      try:
2446
        keyarray.append(f.read())
2447
      finally:
2448
        f.close()
2449

    
2450
    result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
2451
                                    keyarray[2],
2452
                                    keyarray[3], keyarray[4], keyarray[5])
2453
    result.Raise("Cannot transfer ssh keys to the new node")
2454

    
2455
    # Add node to our /etc/hosts, and add key to known_hosts
2456
    if self.cfg.GetClusterInfo().modify_etc_hosts:
2457
      utils.AddHostToEtcHosts(new_node.name)
2458

    
2459
    if new_node.secondary_ip != new_node.primary_ip:
2460
      result = self.rpc.call_node_has_ip_address(new_node.name,
2461
                                                 new_node.secondary_ip)
2462
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
2463
                   prereq=True)
2464
      if not result.payload:
2465
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
2466
                                 " you gave (%s). Please fix and re-run this"
2467
                                 " command." % new_node.secondary_ip)
2468

    
2469
    node_verify_list = [self.cfg.GetMasterNode()]
2470
    node_verify_param = {
2471
      'nodelist': [node],
2472
      # TODO: do a node-net-test as well?
2473
    }
2474

    
2475
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2476
                                       self.cfg.GetClusterName())
2477
    for verifier in node_verify_list:
2478
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
2479
      nl_payload = result[verifier].payload['nodelist']
2480
      if nl_payload:
2481
        for failed in nl_payload:
2482
          feedback_fn("ssh/hostname verification failed %s -> %s" %
2483
                      (verifier, nl_payload[failed]))
2484
        raise errors.OpExecError("ssh/hostname verification failed.")
2485

    
2486
    if self.op.readd:
2487
      _RedistributeAncillaryFiles(self)
2488
      self.context.ReaddNode(new_node)
2489
      # make sure we redistribute the config
2490
      self.cfg.Update(new_node)
2491
      # and make sure the new node will not have old files around
2492
      if not new_node.master_candidate:
2493
        result = self.rpc.call_node_demote_from_mc(new_node.name)
2494
        msg = result.RemoteFailMsg()
2495
        if msg:
2496
          self.LogWarning("Node failed to demote itself from master"
2497
                          " candidate status: %s" % msg)
2498
    else:
2499
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
2500
      self.context.AddNode(new_node)
2501

    
2502

    
2503
class LUSetNodeParams(LogicalUnit):
2504
  """Modifies the parameters of a node.
2505

2506
  """
2507
  HPATH = "node-modify"
2508
  HTYPE = constants.HTYPE_NODE
2509
  _OP_REQP = ["node_name"]
2510
  REQ_BGL = False
2511

    
2512
  def CheckArguments(self):
2513
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2514
    if node_name is None:
2515
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2516
    self.op.node_name = node_name
2517
    _CheckBooleanOpField(self.op, 'master_candidate')
2518
    _CheckBooleanOpField(self.op, 'offline')
2519
    _CheckBooleanOpField(self.op, 'drained')
2520
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
2521
    if all_mods.count(None) == 3:
2522
      raise errors.OpPrereqError("Please pass at least one modification")
2523
    if all_mods.count(True) > 1:
2524
      raise errors.OpPrereqError("Can't set the node into more than one"
2525
                                 " state at the same time")
2526

    
2527
  def ExpandNames(self):
2528
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
2529

    
2530
  def BuildHooksEnv(self):
2531
    """Build hooks env.
2532

2533
    This runs on the master node.
2534

2535
    """
2536
    env = {
2537
      "OP_TARGET": self.op.node_name,
2538
      "MASTER_CANDIDATE": str(self.op.master_candidate),
2539
      "OFFLINE": str(self.op.offline),
2540
      "DRAINED": str(self.op.drained),
2541
      }
2542
    nl = [self.cfg.GetMasterNode(),
2543
          self.op.node_name]
2544
    return env, nl, nl
2545

    
2546
  def CheckPrereq(self):
2547
    """Check prerequisites.
2548

2549
    This only checks the instance list against the existing names.
2550

2551
    """
2552
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
2553

    
2554
    if ((self.op.master_candidate == False or self.op.offline == True or
2555
         self.op.drained == True) and node.master_candidate):
2556
      # we will demote the node from master_candidate
2557
      if self.op.node_name == self.cfg.GetMasterNode():
2558
        raise errors.OpPrereqError("The master node has to be a"
2559
                                   " master candidate, online and not drained")
2560
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2561
      num_candidates, _ = self.cfg.GetMasterCandidateStats()
2562
      if num_candidates <= cp_size:
2563
        msg = ("Not enough master candidates (desired"
2564
               " %d, new value will be %d)" % (cp_size, num_candidates-1))
2565
        if self.op.force:
2566
          self.LogWarning(msg)
2567
        else:
2568
          raise errors.OpPrereqError(msg)
2569

    
2570
    if (self.op.master_candidate == True and
2571
        ((node.offline and not self.op.offline == False) or
2572
         (node.drained and not self.op.drained == False))):
2573
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
2574
                                 " to master_candidate" % node.name)
2575

    
2576
    return
2577

    
2578
  def Exec(self, feedback_fn):
2579
    """Modifies a node.
2580

2581
    """
2582
    node = self.node
2583

    
2584
    result = []
2585
    changed_mc = False
2586

    
2587
    if self.op.offline is not None:
2588
      node.offline = self.op.offline
2589
      result.append(("offline", str(self.op.offline)))
2590
      if self.op.offline == True:
2591
        if node.master_candidate:
2592
          node.master_candidate = False
2593
          changed_mc = True
2594
          result.append(("master_candidate", "auto-demotion due to offline"))
2595
        if node.drained:
2596
          node.drained = False
2597
          result.append(("drained", "clear drained status due to offline"))
2598

    
2599
    if self.op.master_candidate is not None:
2600
      node.master_candidate = self.op.master_candidate
2601
      changed_mc = True
2602
      result.append(("master_candidate", str(self.op.master_candidate)))
2603
      if self.op.master_candidate == False:
2604
        rrc = self.rpc.call_node_demote_from_mc(node.name)
2605
        msg = rrc.fail_msg
2606
        if msg:
2607
          self.LogWarning("Node failed to demote itself: %s" % msg)
2608

    
2609
    if self.op.drained is not None:
2610
      node.drained = self.op.drained
2611
      result.append(("drained", str(self.op.drained)))
2612
      if self.op.drained == True:
2613
        if node.master_candidate:
2614
          node.master_candidate = False
2615
          changed_mc = True
2616
          result.append(("master_candidate", "auto-demotion due to drain"))
2617
          rrc = self.rpc.call_node_demote_from_mc(node.name)
2618
          msg = rrc.RemoteFailMsg()
2619
          if msg:
2620
            self.LogWarning("Node failed to demote itself: %s" % msg)
2621
        if node.offline:
2622
          node.offline = False
2623
          result.append(("offline", "clear offline status due to drain"))
2624

    
2625
    # this will trigger configuration file update, if needed
2626
    self.cfg.Update(node)
2627
    # this will trigger job queue propagation or cleanup
2628
    if changed_mc:
2629
      self.context.ReaddNode(node)
2630

    
2631
    return result
2632

    
2633

    
2634
class LUPowercycleNode(NoHooksLU):
2635
  """Powercycles a node.
2636

2637
  """
2638
  _OP_REQP = ["node_name", "force"]
2639
  REQ_BGL = False
2640

    
2641
  def CheckArguments(self):
2642
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2643
    if node_name is None:
2644
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2645
    self.op.node_name = node_name
2646
    if node_name == self.cfg.GetMasterNode() and not self.op.force:
2647
      raise errors.OpPrereqError("The node is the master and the force"
2648
                                 " parameter was not set")
2649

    
2650
  def ExpandNames(self):
2651
    """Locking for PowercycleNode.
2652

2653
    This is a last-resource option and shouldn't block on other
2654
    jobs. Therefore, we grab no locks.
2655

2656
    """
2657
    self.needed_locks = {}
2658

    
2659
  def CheckPrereq(self):
2660
    """Check prerequisites.
2661

2662
    This LU has no prereqs.
2663

2664
    """
2665
    pass
2666

    
2667
  def Exec(self, feedback_fn):
2668
    """Reboots a node.
2669

2670
    """
2671
    result = self.rpc.call_node_powercycle(self.op.node_name,
2672
                                           self.cfg.GetHypervisorType())
2673
    result.Raise("Failed to schedule the reboot")
2674
    return result.payload
2675

    
2676

    
2677
class LUQueryClusterInfo(NoHooksLU):
2678
  """Query cluster configuration.
2679

2680
  """
2681
  _OP_REQP = []
2682
  REQ_BGL = False
2683

    
2684
  def ExpandNames(self):
2685
    self.needed_locks = {}
2686

    
2687
  def CheckPrereq(self):
2688
    """No prerequsites needed for this LU.
2689

2690
    """
2691
    pass
2692

    
2693
  def Exec(self, feedback_fn):
2694
    """Return cluster config.
2695

2696
    """
2697
    cluster = self.cfg.GetClusterInfo()
2698
    result = {
2699
      "software_version": constants.RELEASE_VERSION,
2700
      "protocol_version": constants.PROTOCOL_VERSION,
2701
      "config_version": constants.CONFIG_VERSION,
2702
      "os_api_version": max(constants.OS_API_VERSIONS),
2703
      "export_version": constants.EXPORT_VERSION,
2704
      "architecture": (platform.architecture()[0], platform.machine()),
2705
      "name": cluster.cluster_name,
2706
      "master": cluster.master_node,
2707
      "default_hypervisor": cluster.enabled_hypervisors[0],
2708
      "enabled_hypervisors": cluster.enabled_hypervisors,
2709
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
2710
                        for hypervisor_name in cluster.enabled_hypervisors]),
2711
      "beparams": cluster.beparams,
2712
      "nicparams": cluster.nicparams,
2713
      "candidate_pool_size": cluster.candidate_pool_size,
2714
      "master_netdev": cluster.master_netdev,
2715
      "volume_group_name": cluster.volume_group_name,
2716
      "file_storage_dir": cluster.file_storage_dir,
2717
      }
2718

    
2719
    return result
2720

    
2721

    
2722
class LUQueryConfigValues(NoHooksLU):
2723
  """Return configuration values.
2724

2725
  """
2726
  _OP_REQP = []
2727
  REQ_BGL = False
2728
  _FIELDS_DYNAMIC = utils.FieldSet()
2729
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag")
2730

    
2731
  def ExpandNames(self):
2732
    self.needed_locks = {}
2733

    
2734
    _CheckOutputFields(static=self._FIELDS_STATIC,
2735
                       dynamic=self._FIELDS_DYNAMIC,
2736
                       selected=self.op.output_fields)
2737

    
2738
  def CheckPrereq(self):
2739
    """No prerequisites.
2740

2741
    """
2742
    pass
2743

    
2744
  def Exec(self, feedback_fn):
2745
    """Dump a representation of the cluster config to the standard output.
2746

2747
    """
2748
    values = []
2749
    for field in self.op.output_fields:
2750
      if field == "cluster_name":
2751
        entry = self.cfg.GetClusterName()
2752
      elif field == "master_node":
2753
        entry = self.cfg.GetMasterNode()
2754
      elif field == "drain_flag":
2755
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
2756
      else:
2757
        raise errors.ParameterError(field)
2758
      values.append(entry)
2759
    return values
2760

    
2761

    
2762
class LUActivateInstanceDisks(NoHooksLU):
2763
  """Bring up an instance's disks.
2764

2765
  """
2766
  _OP_REQP = ["instance_name"]
2767
  REQ_BGL = False
2768

    
2769
  def ExpandNames(self):
2770
    self._ExpandAndLockInstance()
2771
    self.needed_locks[locking.LEVEL_NODE] = []
2772
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2773

    
2774
  def DeclareLocks(self, level):
2775
    if level == locking.LEVEL_NODE:
2776
      self._LockInstancesNodes()
2777

    
2778
  def CheckPrereq(self):
2779
    """Check prerequisites.
2780

2781
    This checks that the instance is in the cluster.
2782

2783
    """
2784
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2785
    assert self.instance is not None, \
2786
      "Cannot retrieve locked instance %s" % self.op.instance_name
2787
    _CheckNodeOnline(self, self.instance.primary_node)
2788

    
2789
  def Exec(self, feedback_fn):
2790
    """Activate the disks.
2791

2792
    """
2793
    disks_ok, disks_info = _AssembleInstanceDisks(self, self.instance)
2794
    if not disks_ok:
2795
      raise errors.OpExecError("Cannot activate block devices")
2796

    
2797
    return disks_info
2798

    
2799

    
2800
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
2801
  """Prepare the block devices for an instance.
2802

2803
  This sets up the block devices on all nodes.
2804

2805
  @type lu: L{LogicalUnit}
2806
  @param lu: the logical unit on whose behalf we execute
2807
  @type instance: L{objects.Instance}
2808
  @param instance: the instance for whose disks we assemble
2809
  @type ignore_secondaries: boolean
2810
  @param ignore_secondaries: if true, errors on secondary nodes
2811
      won't result in an error return from the function
2812
  @return: False if the operation failed, otherwise a list of
2813
      (host, instance_visible_name, node_visible_name)
2814
      with the mapping from node devices to instance devices
2815

2816
  """
2817
  device_info = []
2818
  disks_ok = True
2819
  iname = instance.name
2820
  # With the two passes mechanism we try to reduce the window of
2821
  # opportunity for the race condition of switching DRBD to primary
2822
  # before handshaking occured, but we do not eliminate it
2823

    
2824
  # The proper fix would be to wait (with some limits) until the
2825
  # connection has been made and drbd transitions from WFConnection
2826
  # into any other network-connected state (Connected, SyncTarget,
2827
  # SyncSource, etc.)
2828

    
2829
  # 1st pass, assemble on all nodes in secondary mode
2830
  for inst_disk in instance.disks:
2831
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2832
      lu.cfg.SetDiskID(node_disk, node)
2833
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
2834
      msg = result.fail_msg
2835
      if msg:
2836
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
2837
                           " (is_primary=False, pass=1): %s",
2838
                           inst_disk.iv_name, node, msg)
2839
        if not ignore_secondaries:
2840
          disks_ok = False
2841

    
2842
  # FIXME: race condition on drbd migration to primary
2843

    
2844
  # 2nd pass, do only the primary node
2845
  for inst_disk in instance.disks:
2846
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2847
      if node != instance.primary_node:
2848
        continue
2849
      lu.cfg.SetDiskID(node_disk, node)
2850
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
2851
      msg = result.fail_msg
2852
      if msg:
2853
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
2854
                           " (is_primary=True, pass=2): %s",
2855
                           inst_disk.iv_name, node, msg)
2856
        disks_ok = False
2857
    device_info.append((instance.primary_node, inst_disk.iv_name,
2858
                        result.payload))
2859

    
2860
  # leave the disks configured for the primary node
2861
  # this is a workaround that would be fixed better by
2862
  # improving the logical/physical id handling
2863
  for disk in instance.disks:
2864
    lu.cfg.SetDiskID(disk, instance.primary_node)
2865

    
2866
  return disks_ok, device_info
2867

    
2868

    
2869
def _StartInstanceDisks(lu, instance, force):
2870
  """Start the disks of an instance.
2871

2872
  """
2873
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
2874
                                           ignore_secondaries=force)
2875
  if not disks_ok:
2876
    _ShutdownInstanceDisks(lu, instance)
2877
    if force is not None and not force:
2878
      lu.proc.LogWarning("", hint="If the message above refers to a"
2879
                         " secondary node,"
2880
                         " you can retry the operation using '--force'.")
2881
    raise errors.OpExecError("Disk consistency error")
2882

    
2883

    
2884
class LUDeactivateInstanceDisks(NoHooksLU):
2885
  """Shutdown an instance's disks.
2886

2887
  """
2888
  _OP_REQP = ["instance_name"]
2889
  REQ_BGL = False
2890

    
2891
  def ExpandNames(self):
2892
    self._ExpandAndLockInstance()
2893
    self.needed_locks[locking.LEVEL_NODE] = []
2894
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2895

    
2896
  def DeclareLocks(self, level):
2897
    if level == locking.LEVEL_NODE:
2898
      self._LockInstancesNodes()
2899

    
2900
  def CheckPrereq(self):
2901
    """Check prerequisites.
2902

2903
    This checks that the instance is in the cluster.
2904

2905
    """
2906
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2907
    assert self.instance is not None, \
2908
      "Cannot retrieve locked instance %s" % self.op.instance_name
2909

    
2910
  def Exec(self, feedback_fn):
2911
    """Deactivate the disks
2912

2913
    """
2914
    instance = self.instance
2915
    _SafeShutdownInstanceDisks(self, instance)
2916

    
2917

    
2918
def _SafeShutdownInstanceDisks(lu, instance):
2919
  """Shutdown block devices of an instance.
2920

2921
  This function checks if an instance is running, before calling
2922
  _ShutdownInstanceDisks.
2923

2924
  """
2925
  pnode = instance.primary_node
2926
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
2927
  ins_l.Raise("Can't contact node %s" % pnode)
2928

    
2929
  if instance.name in ins_l.payload:
2930
    raise errors.OpExecError("Instance is running, can't shutdown"
2931
                             " block devices.")
2932

    
2933
  _ShutdownInstanceDisks(lu, instance)
2934

    
2935

    
2936
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
2937
  """Shutdown block devices of an instance.
2938

2939
  This does the shutdown on all nodes of the instance.
2940

2941
  If the ignore_primary is false, errors on the primary node are
2942
  ignored.
2943

2944
  """
2945
  all_result = True
2946
  for disk in instance.disks:
2947
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
2948
      lu.cfg.SetDiskID(top_disk, node)
2949
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
2950
      msg = result.fail_msg
2951
      if msg:
2952
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
2953
                      disk.iv_name, node, msg)
2954
        if not ignore_primary or node != instance.primary_node:
2955
          all_result = False
2956
  return all_result
2957

    
2958

    
2959
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
2960
  """Checks if a node has enough free memory.
2961

2962
  This function check if a given node has the needed amount of free
2963
  memory. In case the node has less memory or we cannot get the
2964
  information from the node, this function raise an OpPrereqError
2965
  exception.
2966

2967
  @type lu: C{LogicalUnit}
2968
  @param lu: a logical unit from which we get configuration data
2969
  @type node: C{str}
2970
  @param node: the node to check
2971
  @type reason: C{str}
2972
  @param reason: string to use in the error message
2973
  @type requested: C{int}
2974
  @param requested: the amount of memory in MiB to check for
2975
  @type hypervisor_name: C{str}
2976
  @param hypervisor_name: the hypervisor to ask for memory stats
2977
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
2978
      we cannot check the node
2979

2980
  """
2981
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
2982
  nodeinfo[node].Raise("Can't get data from node %s" % node, prereq=True)
2983
  free_mem = nodeinfo[node].payload.get('memory_free', None)
2984
  if not isinstance(free_mem, int):
2985
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
2986
                               " was '%s'" % (node, free_mem))
2987
  if requested > free_mem:
2988
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
2989
                               " needed %s MiB, available %s MiB" %
2990
                               (node, reason, requested, free_mem))
2991

    
2992

    
2993
class LUStartupInstance(LogicalUnit):
2994
  """Starts an instance.
2995

2996
  """
2997
  HPATH = "instance-start"
2998
  HTYPE = constants.HTYPE_INSTANCE
2999
  _OP_REQP = ["instance_name", "force"]
3000
  REQ_BGL = False
3001

    
3002
  def ExpandNames(self):
3003
    self._ExpandAndLockInstance()
3004

    
3005
  def BuildHooksEnv(self):
3006
    """Build hooks env.
3007

3008
    This runs on master, primary and secondary nodes of the instance.
3009

3010
    """
3011
    env = {
3012
      "FORCE": self.op.force,
3013
      }
3014
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3015
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3016
    return env, nl, nl
3017

    
3018
  def CheckPrereq(self):
3019
    """Check prerequisites.
3020

3021
    This checks that the instance is in the cluster.
3022

3023
    """
3024
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3025
    assert self.instance is not None, \
3026
      "Cannot retrieve locked instance %s" % self.op.instance_name
3027

    
3028
    # extra beparams
3029
    self.beparams = getattr(self.op, "beparams", {})
3030
    if self.beparams:
3031
      if not isinstance(self.beparams, dict):
3032
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3033
                                   " dict" % (type(self.beparams), ))
3034
      # fill the beparams dict
3035
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3036
      self.op.beparams = self.beparams
3037

    
3038
    # extra hvparams
3039
    self.hvparams = getattr(self.op, "hvparams", {})
3040
    if self.hvparams:
3041
      if not isinstance(self.hvparams, dict):
3042
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3043
                                   " dict" % (type(self.hvparams), ))
3044

    
3045
      # check hypervisor parameter syntax (locally)
3046
      cluster = self.cfg.GetClusterInfo()
3047
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3048
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3049
                                    instance.hvparams)
3050
      filled_hvp.update(self.hvparams)
3051
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3052
      hv_type.CheckParameterSyntax(filled_hvp)
3053
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3054
      self.op.hvparams = self.hvparams
3055

    
3056
    _CheckNodeOnline(self, instance.primary_node)
3057

    
3058
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3059
    # check bridges existence
3060
    _CheckInstanceBridgesExist(self, instance)
3061

    
3062
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3063
                                              instance.name,
3064
                                              instance.hypervisor)
3065
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3066
                      prereq=True)
3067
    if not remote_info.payload: # not running already
3068
      _CheckNodeFreeMemory(self, instance.primary_node,
3069
                           "starting instance %s" % instance.name,
3070
                           bep[constants.BE_MEMORY], instance.hypervisor)
3071

    
3072
  def Exec(self, feedback_fn):
3073
    """Start the instance.
3074

3075
    """
3076
    instance = self.instance
3077
    force = self.op.force
3078

    
3079
    self.cfg.MarkInstanceUp(instance.name)
3080

    
3081
    node_current = instance.primary_node
3082

    
3083
    _StartInstanceDisks(self, instance, force)
3084

    
3085
    result = self.rpc.call_instance_start(node_current, instance,
3086
                                          self.hvparams, self.beparams)
3087
    msg = result.fail_msg
3088
    if msg:
3089
      _ShutdownInstanceDisks(self, instance)
3090
      raise errors.OpExecError("Could not start instance: %s" % msg)
3091

    
3092

    
3093
class LURebootInstance(LogicalUnit):
3094
  """Reboot an instance.
3095

3096
  """
3097
  HPATH = "instance-reboot"
3098
  HTYPE = constants.HTYPE_INSTANCE
3099
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3100
  REQ_BGL = False
3101

    
3102
  def ExpandNames(self):
3103
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3104
                                   constants.INSTANCE_REBOOT_HARD,
3105
                                   constants.INSTANCE_REBOOT_FULL]:
3106
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3107
                                  (constants.INSTANCE_REBOOT_SOFT,
3108
                                   constants.INSTANCE_REBOOT_HARD,
3109
                                   constants.INSTANCE_REBOOT_FULL))
3110
    self._ExpandAndLockInstance()
3111

    
3112
  def BuildHooksEnv(self):
3113
    """Build hooks env.
3114

3115
    This runs on master, primary and secondary nodes of the instance.
3116

3117
    """
3118
    env = {
3119
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3120
      "REBOOT_TYPE": self.op.reboot_type,
3121
      }
3122
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3123
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3124
    return env, nl, nl
3125

    
3126
  def CheckPrereq(self):
3127
    """Check prerequisites.
3128

3129
    This checks that the instance is in the cluster.
3130

3131
    """
3132
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3133
    assert self.instance is not None, \
3134
      "Cannot retrieve locked instance %s" % self.op.instance_name
3135

    
3136
    _CheckNodeOnline(self, instance.primary_node)
3137

    
3138
    # check bridges existence
3139
    _CheckInstanceBridgesExist(self, instance)
3140

    
3141
  def Exec(self, feedback_fn):
3142
    """Reboot the instance.
3143

3144
    """
3145
    instance = self.instance
3146
    ignore_secondaries = self.op.ignore_secondaries
3147
    reboot_type = self.op.reboot_type
3148

    
3149
    node_current = instance.primary_node
3150

    
3151
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3152
                       constants.INSTANCE_REBOOT_HARD]:
3153
      for disk in instance.disks:
3154
        self.cfg.SetDiskID(disk, node_current)
3155
      result = self.rpc.call_instance_reboot(node_current, instance,
3156
                                             reboot_type)
3157
      result.Raise("Could not reboot instance")
3158
    else:
3159
      result = self.rpc.call_instance_shutdown(node_current, instance)
3160
      result.Raise("Could not shutdown instance for full reboot")
3161
      _ShutdownInstanceDisks(self, instance)
3162
      _StartInstanceDisks(self, instance, ignore_secondaries)
3163
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3164
      msg = result.fail_msg
3165
      if msg:
3166
        _ShutdownInstanceDisks(self, instance)
3167
        raise errors.OpExecError("Could not start instance for"
3168
                                 " full reboot: %s" % msg)
3169

    
3170
    self.cfg.MarkInstanceUp(instance.name)
3171

    
3172

    
3173
class LUShutdownInstance(LogicalUnit):
3174
  """Shutdown an instance.
3175

3176
  """
3177
  HPATH = "instance-stop"
3178
  HTYPE = constants.HTYPE_INSTANCE
3179
  _OP_REQP = ["instance_name"]
3180
  REQ_BGL = False
3181

    
3182
  def ExpandNames(self):
3183
    self._ExpandAndLockInstance()
3184

    
3185
  def BuildHooksEnv(self):
3186
    """Build hooks env.
3187

3188
    This runs on master, primary and secondary nodes of the instance.
3189

3190
    """
3191
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3192
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3193
    return env, nl, nl
3194

    
3195
  def CheckPrereq(self):
3196
    """Check prerequisites.
3197

3198
    This checks that the instance is in the cluster.
3199

3200
    """
3201
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3202
    assert self.instance is not None, \
3203
      "Cannot retrieve locked instance %s" % self.op.instance_name
3204
    _CheckNodeOnline(self, self.instance.primary_node)
3205

    
3206
  def Exec(self, feedback_fn):
3207
    """Shutdown the instance.
3208

3209
    """
3210
    instance = self.instance
3211
    node_current = instance.primary_node
3212
    self.cfg.MarkInstanceDown(instance.name)
3213
    result = self.rpc.call_instance_shutdown(node_current, instance)
3214
    msg = result.fail_msg
3215
    if msg:
3216
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3217

    
3218
    _ShutdownInstanceDisks(self, instance)
3219

    
3220

    
3221
class LUReinstallInstance(LogicalUnit):
3222
  """Reinstall an instance.
3223

3224
  """
3225
  HPATH = "instance-reinstall"
3226
  HTYPE = constants.HTYPE_INSTANCE
3227
  _OP_REQP = ["instance_name"]
3228
  REQ_BGL = False
3229

    
3230
  def ExpandNames(self):
3231
    self._ExpandAndLockInstance()
3232

    
3233
  def BuildHooksEnv(self):
3234
    """Build hooks env.
3235

3236
    This runs on master, primary and secondary nodes of the instance.
3237

3238
    """
3239
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3240
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3241
    return env, nl, nl
3242

    
3243
  def CheckPrereq(self):
3244
    """Check prerequisites.
3245

3246
    This checks that the instance is in the cluster and is not running.
3247

3248
    """
3249
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3250
    assert instance is not None, \
3251
      "Cannot retrieve locked instance %s" % self.op.instance_name
3252
    _CheckNodeOnline(self, instance.primary_node)
3253

    
3254
    if instance.disk_template == constants.DT_DISKLESS:
3255
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3256
                                 self.op.instance_name)
3257
    if instance.admin_up:
3258
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3259
                                 self.op.instance_name)
3260
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3261
                                              instance.name,
3262
                                              instance.hypervisor)
3263
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3264
                      prereq=True)
3265
    if remote_info.payload:
3266
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3267
                                 (self.op.instance_name,
3268
                                  instance.primary_node))
3269

    
3270
    self.op.os_type = getattr(self.op, "os_type", None)
3271
    if self.op.os_type is not None:
3272
      # OS verification
3273
      pnode = self.cfg.GetNodeInfo(
3274
        self.cfg.ExpandNodeName(instance.primary_node))
3275
      if pnode is None:
3276
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
3277
                                   self.op.pnode)
3278
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3279
      result.Raise("OS '%s' not in supported OS list for primary node %s" %
3280
                   (self.op.os_type, pnode.name), prereq=True)
3281

    
3282
    self.instance = instance
3283

    
3284
  def Exec(self, feedback_fn):
3285
    """Reinstall the instance.
3286

3287
    """
3288
    inst = self.instance
3289

    
3290
    if self.op.os_type is not None:
3291
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3292
      inst.os = self.op.os_type
3293
      self.cfg.Update(inst)
3294

    
3295
    _StartInstanceDisks(self, inst, None)
3296
    try:
3297
      feedback_fn("Running the instance OS create scripts...")
3298
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
3299
      result.Raise("Could not install OS for instance %s on node %s" %
3300
                   (inst.name, inst.primary_node))
3301
    finally:
3302
      _ShutdownInstanceDisks(self, inst)
3303

    
3304

    
3305
class LURenameInstance(LogicalUnit):
3306
  """Rename an instance.
3307

3308
  """
3309
  HPATH = "instance-rename"
3310
  HTYPE = constants.HTYPE_INSTANCE
3311
  _OP_REQP = ["instance_name", "new_name"]
3312

    
3313
  def BuildHooksEnv(self):
3314
    """Build hooks env.
3315

3316
    This runs on master, primary and secondary nodes of the instance.
3317

3318
    """
3319
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3320
    env["INSTANCE_NEW_NAME"] = self.op.new_name
3321
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3322
    return env, nl, nl
3323

    
3324
  def CheckPrereq(self):
3325
    """Check prerequisites.
3326

3327
    This checks that the instance is in the cluster and is not running.
3328

3329
    """
3330
    instance = self.cfg.GetInstanceInfo(
3331
      self.cfg.ExpandInstanceName(self.op.instance_name))
3332
    if instance is None:
3333
      raise errors.OpPrereqError("Instance '%s' not known" %
3334
                                 self.op.instance_name)
3335
    _CheckNodeOnline(self, instance.primary_node)
3336

    
3337
    if instance.admin_up:
3338
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3339
                                 self.op.instance_name)
3340
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3341
                                              instance.name,
3342
                                              instance.hypervisor)
3343
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3344
                      prereq=True)
3345
    if remote_info.payload:
3346
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3347
                                 (self.op.instance_name,
3348
                                  instance.primary_node))
3349
    self.instance = instance
3350

    
3351
    # new name verification
3352
    name_info = utils.HostInfo(self.op.new_name)
3353

    
3354
    self.op.new_name = new_name = name_info.name
3355
    instance_list = self.cfg.GetInstanceList()
3356
    if new_name in instance_list:
3357
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3358
                                 new_name)
3359

    
3360
    if not getattr(self.op, "ignore_ip", False):
3361
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
3362
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
3363
                                   (name_info.ip, new_name))
3364

    
3365

    
3366
  def Exec(self, feedback_fn):
3367
    """Reinstall the instance.
3368

3369
    """
3370
    inst = self.instance
3371
    old_name = inst.name
3372

    
3373
    if inst.disk_template == constants.DT_FILE:
3374
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3375

    
3376
    self.cfg.RenameInstance(inst.name, self.op.new_name)
3377
    # Change the instance lock. This is definitely safe while we hold the BGL
3378
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
3379
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
3380

    
3381
    # re-read the instance from the configuration after rename
3382
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
3383

    
3384
    if inst.disk_template == constants.DT_FILE:
3385
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3386
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
3387
                                                     old_file_storage_dir,
3388
                                                     new_file_storage_dir)
3389
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
3390
                   " (but the instance has been renamed in Ganeti)" %
3391
                   (inst.primary_node, old_file_storage_dir,
3392
                    new_file_storage_dir))
3393

    
3394
    _StartInstanceDisks(self, inst, None)
3395
    try:
3396
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
3397
                                                 old_name)
3398
      msg = result.fail_msg
3399
      if msg:
3400
        msg = ("Could not run OS rename script for instance %s on node %s"
3401
               " (but the instance has been renamed in Ganeti): %s" %
3402
               (inst.name, inst.primary_node, msg))
3403
        self.proc.LogWarning(msg)
3404
    finally:
3405
      _ShutdownInstanceDisks(self, inst)
3406

    
3407

    
3408
class LURemoveInstance(LogicalUnit):
3409
  """Remove an instance.
3410

3411
  """
3412
  HPATH = "instance-remove"
3413
  HTYPE = constants.HTYPE_INSTANCE
3414
  _OP_REQP = ["instance_name", "ignore_failures"]
3415
  REQ_BGL = False
3416

    
3417
  def ExpandNames(self):
3418
    self._ExpandAndLockInstance()
3419
    self.needed_locks[locking.LEVEL_NODE] = []
3420
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3421

    
3422
  def DeclareLocks(self, level):
3423
    if level == locking.LEVEL_NODE:
3424
      self._LockInstancesNodes()
3425

    
3426
  def BuildHooksEnv(self):
3427
    """Build hooks env.
3428

3429
    This runs on master, primary and secondary nodes of the instance.
3430

3431
    """
3432
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3433
    nl = [self.cfg.GetMasterNode()]
3434
    return env, nl, nl
3435

    
3436
  def CheckPrereq(self):
3437
    """Check prerequisites.
3438

3439
    This checks that the instance is in the cluster.
3440

3441
    """
3442
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3443
    assert self.instance is not None, \
3444
      "Cannot retrieve locked instance %s" % self.op.instance_name
3445

    
3446
  def Exec(self, feedback_fn):
3447
    """Remove the instance.
3448

3449
    """
3450
    instance = self.instance
3451
    logging.info("Shutting down instance %s on node %s",
3452
                 instance.name, instance.primary_node)
3453

    
3454
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
3455
    msg = result.fail_msg
3456
    if msg:
3457
      if self.op.ignore_failures:
3458
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
3459
      else:
3460
        raise errors.OpExecError("Could not shutdown instance %s on"
3461
                                 " node %s: %s" %
3462
                                 (instance.name, instance.primary_node, msg))
3463

    
3464
    logging.info("Removing block devices for instance %s", instance.name)
3465

    
3466
    if not _RemoveDisks(self, instance):
3467
      if self.op.ignore_failures:
3468
        feedback_fn("Warning: can't remove instance's disks")
3469
      else:
3470
        raise errors.OpExecError("Can't remove instance's disks")
3471

    
3472
    logging.info("Removing instance %s out of cluster config", instance.name)
3473

    
3474
    self.cfg.RemoveInstance(instance.name)
3475
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
3476

    
3477

    
3478
class LUQueryInstances(NoHooksLU):
3479
  """Logical unit for querying instances.
3480

3481
  """
3482
  _OP_REQP = ["output_fields", "names", "use_locking"]
3483
  REQ_BGL = False
3484
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
3485
                                    "admin_state",
3486
                                    "disk_template", "ip", "mac", "bridge",
3487
                                    "nic_mode", "nic_link",
3488
                                    "sda_size", "sdb_size", "vcpus", "tags",
3489
                                    "network_port", "beparams",
3490
                                    r"(disk)\.(size)/([0-9]+)",
3491
                                    r"(disk)\.(sizes)", "disk_usage",
3492
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
3493
                                    r"(nic)\.(bridge)/([0-9]+)",
3494
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
3495
                                    r"(disk|nic)\.(count)",
3496
                                    "serial_no", "hypervisor", "hvparams",] +
3497
                                  ["hv/%s" % name
3498
                                   for name in constants.HVS_PARAMETERS] +
3499
                                  ["be/%s" % name
3500
                                   for name in constants.BES_PARAMETERS])
3501
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
3502

    
3503

    
3504
  def ExpandNames(self):
3505
    _CheckOutputFields(static=self._FIELDS_STATIC,
3506
                       dynamic=self._FIELDS_DYNAMIC,
3507
                       selected=self.op.output_fields)
3508

    
3509
    self.needed_locks = {}
3510
    self.share_locks[locking.LEVEL_INSTANCE] = 1
3511
    self.share_locks[locking.LEVEL_NODE] = 1
3512

    
3513
    if self.op.names:
3514
      self.wanted = _GetWantedInstances(self, self.op.names)
3515
    else:
3516
      self.wanted = locking.ALL_SET
3517

    
3518
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3519
    self.do_locking = self.do_node_query and self.op.use_locking
3520
    if self.do_locking:
3521
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3522
      self.needed_locks[locking.LEVEL_NODE] = []
3523
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3524

    
3525
  def DeclareLocks(self, level):
3526
    if level == locking.LEVEL_NODE and self.do_locking:
3527
      self._LockInstancesNodes()
3528

    
3529
  def CheckPrereq(self):
3530
    """Check prerequisites.
3531

3532
    """
3533
    pass
3534

    
3535
  def Exec(self, feedback_fn):
3536
    """Computes the list of nodes and their attributes.
3537

3538
    """
3539
    all_info = self.cfg.GetAllInstancesInfo()
3540
    if self.wanted == locking.ALL_SET:
3541
      # caller didn't specify instance names, so ordering is not important
3542
      if self.do_locking:
3543
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
3544
      else:
3545
        instance_names = all_info.keys()
3546
      instance_names = utils.NiceSort(instance_names)
3547
    else:
3548
      # caller did specify names, so we must keep the ordering
3549
      if self.do_locking:
3550
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
3551
      else:
3552
        tgt_set = all_info.keys()
3553
      missing = set(self.wanted).difference(tgt_set)
3554
      if missing:
3555
        raise errors.OpExecError("Some instances were removed before"
3556
                                 " retrieving their data: %s" % missing)
3557
      instance_names = self.wanted
3558

    
3559
    instance_list = [all_info[iname] for iname in instance_names]
3560

    
3561
    # begin data gathering
3562

    
3563
    nodes = frozenset([inst.primary_node for inst in instance_list])
3564
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
3565

    
3566
    bad_nodes = []
3567
    off_nodes = []
3568
    if self.do_node_query:
3569
      live_data = {}
3570
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
3571
      for name in nodes:
3572
        result = node_data[name]
3573
        if result.offline:
3574
          # offline nodes will be in both lists
3575
          off_nodes.append(name)
3576
        if result.failed or result.fail_msg:
3577
          bad_nodes.append(name)
3578
        else:
3579
          if result.payload:
3580
            live_data.update(result.payload)
3581
          # else no instance is alive
3582
    else:
3583
      live_data = dict([(name, {}) for name in instance_names])
3584

    
3585
    # end data gathering
3586

    
3587
    HVPREFIX = "hv/"
3588
    BEPREFIX = "be/"
3589
    output = []
3590
    cluster = self.cfg.GetClusterInfo()
3591
    for instance in instance_list:
3592
      iout = []
3593
      i_hv = cluster.FillHV(instance)
3594
      i_be = cluster.FillBE(instance)
3595
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
3596
                                 nic.nicparams) for nic in instance.nics]
3597
      for field in self.op.output_fields:
3598
        st_match = self._FIELDS_STATIC.Matches(field)
3599
        if field == "name":
3600
          val = instance.name
3601
        elif field == "os":
3602
          val = instance.os
3603
        elif field == "pnode":
3604
          val = instance.primary_node
3605
        elif field == "snodes":
3606
          val = list(instance.secondary_nodes)
3607
        elif field == "admin_state":
3608
          val = instance.admin_up
3609
        elif field == "oper_state":
3610
          if instance.primary_node in bad_nodes:
3611
            val = None
3612
          else:
3613
            val = bool(live_data.get(instance.name))
3614
        elif field == "status":
3615
          if instance.primary_node in off_nodes:
3616
            val = "ERROR_nodeoffline"
3617
          elif instance.primary_node in bad_nodes:
3618
            val = "ERROR_nodedown"
3619
          else:
3620
            running = bool(live_data.get(instance.name))
3621
            if running:
3622
              if instance.admin_up:
3623
                val = "running"
3624
              else:
3625
                val = "ERROR_up"
3626
            else:
3627
              if instance.admin_up:
3628
                val = "ERROR_down"
3629
              else:
3630
                val = "ADMIN_down"
3631
        elif field == "oper_ram":
3632
          if instance.primary_node in bad_nodes:
3633
            val = None
3634
          elif instance.name in live_data:
3635
            val = live_data[instance.name].get("memory", "?")
3636
          else:
3637
            val = "-"
3638
        elif field == "vcpus":
3639
          val = i_be[constants.BE_VCPUS]
3640
        elif field == "disk_template":
3641
          val = instance.disk_template
3642
        elif field == "ip":
3643
          if instance.nics:
3644
            val = instance.nics[0].ip
3645
          else:
3646
            val = None
3647
        elif field == "nic_mode":
3648
          if instance.nics:
3649
            val = i_nicp[0][constants.NIC_MODE]
3650
          else:
3651
            val = None
3652
        elif field == "nic_link":
3653
          if instance.nics:
3654
            val = i_nicp[0][constants.NIC_LINK]
3655
          else:
3656
            val = None
3657
        elif field == "bridge":
3658
          if (instance.nics and
3659
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
3660
            val = i_nicp[0][constants.NIC_LINK]
3661
          else:
3662
            val = None
3663
        elif field == "mac":
3664
          if instance.nics:
3665
            val = instance.nics[0].mac
3666
          else:
3667
            val = None
3668
        elif field == "sda_size" or field == "sdb_size":
3669
          idx = ord(field[2]) - ord('a')
3670
          try:
3671
            val = instance.FindDisk(idx).size
3672
          except errors.OpPrereqError:
3673
            val = None
3674
        elif field == "disk_usage": # total disk usage per node
3675
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
3676
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
3677
        elif field == "tags":
3678
          val = list(instance.GetTags())
3679
        elif field == "serial_no":
3680
          val = instance.serial_no
3681
        elif field == "network_port":
3682
          val = instance.network_port
3683
        elif field == "hypervisor":
3684
          val = instance.hypervisor
3685
        elif field == "hvparams":
3686
          val = i_hv
3687
        elif (field.startswith(HVPREFIX) and
3688
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
3689
          val = i_hv.get(field[len(HVPREFIX):], None)
3690
        elif field == "beparams":
3691
          val = i_be
3692
        elif (field.startswith(BEPREFIX) and
3693
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
3694
          val = i_be.get(field[len(BEPREFIX):], None)
3695
        elif st_match and st_match.groups():
3696
          # matches a variable list
3697
          st_groups = st_match.groups()
3698
          if st_groups and st_groups[0] == "disk":
3699
            if st_groups[1] == "count":
3700
              val = len(instance.disks)
3701
            elif st_groups[1] == "sizes":
3702
              val = [disk.size for disk in instance.disks]
3703
            elif st_groups[1] == "size":
3704
              try:
3705
                val = instance.FindDisk(st_groups[2]).size
3706
              except errors.OpPrereqError:
3707
                val = None
3708
            else:
3709
              assert False, "Unhandled disk parameter"
3710
          elif st_groups[0] == "nic":
3711
            if st_groups[1] == "count":
3712
              val = len(instance.nics)
3713
            elif st_groups[1] == "macs":
3714
              val = [nic.mac for nic in instance.nics]
3715
            elif st_groups[1] == "ips":
3716
              val = [nic.ip for nic in instance.nics]
3717
            elif st_groups[1] == "modes":
3718
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
3719
            elif st_groups[1] == "links":
3720
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
3721
            elif st_groups[1] == "bridges":
3722
              val = []
3723
              for nicp in i_nicp:
3724
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3725
                  val.append(nicp[constants.NIC_LINK])
3726
                else:
3727
                  val.append(None)
3728
            else:
3729
              # index-based item
3730
              nic_idx = int(st_groups[2])
3731
              if nic_idx >= len(instance.nics):
3732
                val = None
3733
              else:
3734
                if st_groups[1] == "mac":
3735
                  val = instance.nics[nic_idx].mac
3736
                elif st_groups[1] == "ip":
3737
                  val = instance.nics[nic_idx].ip
3738
                elif st_groups[1] == "mode":
3739
                  val = i_nicp[nic_idx][constants.NIC_MODE]
3740
                elif st_groups[1] == "link":
3741
                  val = i_nicp[nic_idx][constants.NIC_LINK]
3742
                elif st_groups[1] == "bridge":
3743
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
3744
                  if nic_mode == constants.NIC_MODE_BRIDGED:
3745
                    val = i_nicp[nic_idx][constants.NIC_LINK]
3746
                  else:
3747
                    val = None
3748
                else:
3749
                  assert False, "Unhandled NIC parameter"
3750
          else:
3751
            assert False, ("Declared but unhandled variable parameter '%s'" %
3752
                           field)
3753
        else:
3754
          assert False, "Declared but unhandled parameter '%s'" % field
3755
        iout.append(val)
3756
      output.append(iout)
3757

    
3758
    return output
3759

    
3760

    
3761
class LUFailoverInstance(LogicalUnit):
3762
  """Failover an instance.
3763

3764
  """
3765
  HPATH = "instance-failover"
3766
  HTYPE = constants.HTYPE_INSTANCE
3767
  _OP_REQP = ["instance_name", "ignore_consistency"]
3768
  REQ_BGL = False
3769

    
3770
  def ExpandNames(self):
3771
    self._ExpandAndLockInstance()
3772
    self.needed_locks[locking.LEVEL_NODE] = []
3773
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3774

    
3775
  def DeclareLocks(self, level):
3776
    if level == locking.LEVEL_NODE:
3777
      self._LockInstancesNodes()
3778

    
3779
  def BuildHooksEnv(self):
3780
    """Build hooks env.
3781

3782
    This runs on master, primary and secondary nodes of the instance.
3783

3784
    """
3785
    env = {
3786
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
3787
      }
3788
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3789
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
3790
    return env, nl, nl
3791

    
3792
  def CheckPrereq(self):
3793
    """Check prerequisites.
3794

3795
    This checks that the instance is in the cluster.
3796

3797
    """
3798
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3799
    assert self.instance is not None, \
3800
      "Cannot retrieve locked instance %s" % self.op.instance_name
3801

    
3802
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3803
    if instance.disk_template not in constants.DTS_NET_MIRROR:
3804
      raise errors.OpPrereqError("Instance's disk layout is not"
3805
                                 " network mirrored, cannot failover.")
3806

    
3807
    secondary_nodes = instance.secondary_nodes
3808
    if not secondary_nodes:
3809
      raise errors.ProgrammerError("no secondary node but using "
3810
                                   "a mirrored disk template")
3811

    
3812
    target_node = secondary_nodes[0]
3813
    _CheckNodeOnline(self, target_node)
3814
    _CheckNodeNotDrained(self, target_node)
3815
    if instance.admin_up:
3816
      # check memory requirements on the secondary node
3817
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
3818
                           instance.name, bep[constants.BE_MEMORY],
3819
                           instance.hypervisor)
3820
    else:
3821
      self.LogInfo("Not checking memory on the secondary node as"
3822
                   " instance will not be started")
3823

    
3824
    # check bridge existance
3825
    _CheckInstanceBridgesExist(self, instance, node=target_node)
3826

    
3827
  def Exec(self, feedback_fn):
3828
    """Failover an instance.
3829

3830
    The failover is done by shutting it down on its present node and
3831
    starting it on the secondary.
3832

3833
    """
3834
    instance = self.instance
3835

    
3836
    source_node = instance.primary_node
3837
    target_node = instance.secondary_nodes[0]
3838

    
3839
    feedback_fn("* checking disk consistency between source and target")
3840
    for dev in instance.disks:
3841
      # for drbd, these are drbd over lvm
3842
      if not _CheckDiskConsistency(self, dev, target_node, False):
3843
        if instance.admin_up and not self.op.ignore_consistency:
3844
          raise errors.OpExecError("Disk %s is degraded on target node,"
3845
                                   " aborting failover." % dev.iv_name)
3846

    
3847
    feedback_fn("* shutting down instance on source node")
3848
    logging.info("Shutting down instance %s on node %s",
3849
                 instance.name, source_node)
3850

    
3851
    result = self.rpc.call_instance_shutdown(source_node, instance)
3852
    msg = result.fail_msg
3853
    if msg:
3854
      if self.op.ignore_consistency:
3855
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
3856
                             " Proceeding anyway. Please make sure node"
3857
                             " %s is down. Error details: %s",
3858
                             instance.name, source_node, source_node, msg)
3859
      else:
3860
        raise errors.OpExecError("Could not shutdown instance %s on"
3861
                                 " node %s: %s" %
3862
                                 (instance.name, source_node, msg))
3863

    
3864
    feedback_fn("* deactivating the instance's disks on source node")
3865
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
3866
      raise errors.OpExecError("Can't shut down the instance's disks.")
3867

    
3868
    instance.primary_node = target_node
3869
    # distribute new instance config to the other nodes
3870
    self.cfg.Update(instance)
3871

    
3872
    # Only start the instance if it's marked as up
3873
    if instance.admin_up:
3874
      feedback_fn("* activating the instance's disks on target node")
3875
      logging.info("Starting instance %s on node %s",
3876
                   instance.name, target_node)
3877

    
3878
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
3879
                                               ignore_secondaries=True)
3880
      if not disks_ok:
3881
        _ShutdownInstanceDisks(self, instance)
3882
        raise errors.OpExecError("Can't activate the instance's disks")
3883

    
3884
      feedback_fn("* starting the instance on the target node")
3885
      result = self.rpc.call_instance_start(target_node, instance, None, None)
3886
      msg = result.fail_msg
3887
      if msg:
3888
        _ShutdownInstanceDisks(self, instance)
3889
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
3890
                                 (instance.name, target_node, msg))
3891

    
3892

    
3893
class LUMigrateInstance(LogicalUnit):
3894
  """Migrate an instance.
3895

3896
  This is migration without shutting down, compared to the failover,
3897
  which is done with shutdown.
3898

3899
  """
3900
  HPATH = "instance-migrate"
3901
  HTYPE = constants.HTYPE_INSTANCE
3902
  _OP_REQP = ["instance_name", "live", "cleanup"]
3903

    
3904
  REQ_BGL = False
3905

    
3906
  def ExpandNames(self):
3907
    self._ExpandAndLockInstance()
3908

    
3909
    self.needed_locks[locking.LEVEL_NODE] = []
3910
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3911

    
3912
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
3913
                                       self.op.live, self.op.cleanup)
3914
    self.tasklets.append(self._migrater)
3915

    
3916
  def DeclareLocks(self, level):
3917
    if level == locking.LEVEL_NODE:
3918
      self._LockInstancesNodes()
3919

    
3920
  def BuildHooksEnv(self):
3921
    """Build hooks env.
3922

3923
    This runs on master, primary and secondary nodes of the instance.
3924

3925
    """
3926
    instance = self._migrater.instance
3927
    env = _BuildInstanceHookEnvByObject(self, instance)
3928
    env["MIGRATE_LIVE"] = self.op.live
3929
    env["MIGRATE_CLEANUP"] = self.op.cleanup
3930
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
3931
    return env, nl, nl
3932

    
3933

    
3934
class TLMigrateInstance(Tasklet):
3935
  def __init__(self, lu, instance_name, live, cleanup):
3936
    """Initializes this class.
3937

3938
    """
3939
    # Parameters
3940
    self.lu = lu
3941
    self.instance_name = instance_name
3942
    self.live = live
3943
    self.cleanup = cleanup
3944

    
3945
    # Shortcuts
3946
    self.cfg = lu.cfg
3947
    self.rpc = lu.rpc
3948

    
3949
  def CheckPrereq(self):
3950
    """Check prerequisites.
3951

3952
    This checks that the instance is in the cluster.
3953

3954
    """
3955
    instance = self.cfg.GetInstanceInfo(
3956
      self.cfg.ExpandInstanceName(self.instance_name))
3957
    if instance is None:
3958
      raise errors.OpPrereqError("Instance '%s' not known" %
3959
                                 self.instance_name)
3960

    
3961
    if instance.disk_template != constants.DT_DRBD8:
3962
      raise errors.OpPrereqError("Instance's disk layout is not"
3963
                                 " drbd8, cannot migrate.")
3964

    
3965
    secondary_nodes = instance.secondary_nodes
3966
    if not secondary_nodes:
3967
      raise errors.ConfigurationError("No secondary node but using"
3968
                                      " drbd8 disk template")
3969

    
3970
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
3971

    
3972
    target_node = secondary_nodes[0]
3973
    # check memory requirements on the secondary node
3974
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
3975
                         instance.name, i_be[constants.BE_MEMORY],
3976
                         instance.hypervisor)
3977

    
3978
    # check bridge existance
3979
    _CheckInstanceBridgesExist(self, instance, node=target_node)
3980

    
3981
    if not self.cleanup:
3982
      _CheckNodeNotDrained(self, target_node)
3983
      result = self.rpc.call_instance_migratable(instance.primary_node,
3984
                                                 instance)
3985
      result.Raise("Can't migrate, please use failover", prereq=True)
3986

    
3987
    self.instance = instance
3988

    
3989
  def _WaitUntilSync(self):
3990
    """Poll with custom rpc for disk sync.
3991

3992
    This uses our own step-based rpc call.
3993

3994
    """
3995
    self.feedback_fn("* wait until resync is done")
3996
    all_done = False
3997
    while not all_done:
3998
      all_done = True
3999
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
4000
                                            self.nodes_ip,
4001
                                            self.instance.disks)
4002
      min_percent = 100
4003
      for node, nres in result.items():
4004
        nres.Raise("Cannot resync disks on node %s" % node)
4005
        node_done, node_percent = nres.payload
4006
        all_done = all_done and node_done
4007
        if node_percent is not None:
4008
          min_percent = min(min_percent, node_percent)
4009
      if not all_done:
4010
        if min_percent < 100:
4011
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
4012
        time.sleep(2)
4013

    
4014
  def _EnsureSecondary(self, node):
4015
    """Demote a node to secondary.
4016

4017
    """
4018
    self.feedback_fn("* switching node %s to secondary mode" % node)
4019

    
4020
    for dev in self.instance.disks:
4021
      self.cfg.SetDiskID(dev, node)
4022

    
4023
    result = self.rpc.call_blockdev_close(node, self.instance.name,
4024
                                          self.instance.disks)
4025
    result.Raise("Cannot change disk to secondary on node %s" % node)
4026

    
4027
  def _GoStandalone(self):
4028
    """Disconnect from the network.
4029

4030
    """
4031
    self.feedback_fn("* changing into standalone mode")
4032
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
4033
                                               self.instance.disks)
4034
    for node, nres in result.items():
4035
      nres.Raise("Cannot disconnect disks node %s" % node)
4036

    
4037
  def _GoReconnect(self, multimaster):
4038
    """Reconnect to the network.
4039

4040
    """
4041
    if multimaster:
4042
      msg = "dual-master"
4043
    else:
4044
      msg = "single-master"
4045
    self.feedback_fn("* changing disks into %s mode" % msg)
4046
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
4047
                                           self.instance.disks,
4048
                                           self.instance.name, multimaster)
4049
    for node, nres in result.items():
4050
      nres.Raise("Cannot change disks config on node %s" % node)
4051

    
4052
  def _ExecCleanup(self):
4053
    """Try to cleanup after a failed migration.
4054

4055
    The cleanup is done by:
4056
      - check that the instance is running only on one node
4057
        (and update the config if needed)
4058
      - change disks on its secondary node to secondary
4059
      - wait until disks are fully synchronized
4060
      - disconnect from the network
4061
      - change disks into single-master mode
4062
      - wait again until disks are fully synchronized
4063

4064
    """
4065
    instance = self.instance
4066
    target_node = self.target_node
4067
    source_node = self.source_node
4068

    
4069
    # check running on only one node
4070
    self.feedback_fn("* checking where the instance actually runs"
4071
                     " (if this hangs, the hypervisor might be in"
4072
                     " a bad state)")
4073
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
4074
    for node, result in ins_l.items():
4075
      result.Raise("Can't contact node %s" % node)
4076

    
4077
    runningon_source = instance.name in ins_l[source_node].payload
4078
    runningon_target = instance.name in ins_l[target_node].payload
4079

    
4080
    if runningon_source and runningon_target:
4081
      raise errors.OpExecError("Instance seems to be running on two nodes,"
4082
                               " or the hypervisor is confused. You will have"
4083
                               " to ensure manually that it runs only on one"
4084
                               " and restart this operation.")
4085

    
4086
    if not (runningon_source or runningon_target):
4087
      raise errors.OpExecError("Instance does not seem to be running at all."
4088
                               " In this case, it's safer to repair by"
4089
                               " running 'gnt-instance stop' to ensure disk"
4090
                               " shutdown, and then restarting it.")
4091

    
4092
    if runningon_target:
4093
      # the migration has actually succeeded, we need to update the config
4094
      self.feedback_fn("* instance running on secondary node (%s),"
4095
                       " updating config" % target_node)
4096
      instance.primary_node = target_node
4097
      self.cfg.Update(instance)
4098
      demoted_node = source_node
4099
    else:
4100
      self.feedback_fn("* instance confirmed to be running on its"
4101
                       " primary node (%s)" % source_node)
4102
      demoted_node = target_node
4103

    
4104
    self._EnsureSecondary(demoted_node)
4105
    try:
4106
      self._WaitUntilSync()
4107
    except errors.OpExecError:
4108
      # we ignore here errors, since if the device is standalone, it
4109
      # won't be able to sync
4110
      pass
4111
    self._GoStandalone()
4112
    self._GoReconnect(False)
4113
    self._WaitUntilSync()
4114

    
4115
    self.feedback_fn("* done")
4116

    
4117
  def _RevertDiskStatus(self):
4118
    """Try to revert the disk status after a failed migration.
4119

4120
    """
4121
    target_node = self.target_node
4122
    try:
4123
      self._EnsureSecondary(target_node)
4124
      self._GoStandalone()
4125
      self._GoReconnect(False)
4126
      self._WaitUntilSync()
4127
    except errors.OpExecError, err:
4128
      self.lu.LogWarning("Migration failed and I can't reconnect the"
4129
                         " drives: error '%s'\n"
4130
                         "Please look and recover the instance status" %
4131
                         str(err))
4132

    
4133
  def _AbortMigration(self):
4134
    """Call the hypervisor code to abort a started migration.
4135

4136
    """
4137
    instance = self.instance
4138
    target_node = self.target_node
4139
    migration_info = self.migration_info
4140

    
4141
    abort_result = self.rpc.call_finalize_migration(target_node,
4142
                                                    instance,
4143
                                                    migration_info,
4144
                                                    False)
4145
    abort_msg = abort_result.fail_msg
4146
    if abort_msg:
4147
      logging.error("Aborting migration failed on target node %s: %s" %
4148
                    (target_node, abort_msg))
4149
      # Don't raise an exception here, as we stil have to try to revert the
4150
      # disk status, even if this step failed.
4151

    
4152
  def _ExecMigration(self):
4153
    """Migrate an instance.
4154

4155
    The migrate is done by:
4156
      - change the disks into dual-master mode
4157
      - wait until disks are fully synchronized again
4158
      - migrate the instance
4159
      - change disks on the new secondary node (the old primary) to secondary
4160
      - wait until disks are fully synchronized
4161
      - change disks into single-master mode
4162

4163
    """
4164
    instance = self.instance
4165
    target_node = self.target_node
4166
    source_node = self.source_node
4167

    
4168
    self.feedback_fn("* checking disk consistency between source and target")
4169
    for dev in instance.disks:
4170
      if not _CheckDiskConsistency(self, dev, target_node, False):
4171
        raise errors.OpExecError("Disk %s is degraded or not fully"
4172
                                 " synchronized on target node,"
4173
                                 " aborting migrate." % dev.iv_name)
4174

    
4175
    # First get the migration information from the remote node
4176
    result = self.rpc.call_migration_info(source_node, instance)
4177
    msg = result.fail_msg
4178
    if msg:
4179
      log_err = ("Failed fetching source migration information from %s: %s" %
4180
                 (source_node, msg))
4181
      logging.error(log_err)
4182
      raise errors.OpExecError(log_err)
4183

    
4184
    self.migration_info = migration_info = result.payload
4185

    
4186
    # Then switch the disks to master/master mode
4187
    self._EnsureSecondary(target_node)
4188
    self._GoStandalone()
4189
    self._GoReconnect(True)
4190
    self._WaitUntilSync()
4191

    
4192
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
4193
    result = self.rpc.call_accept_instance(target_node,
4194
                                           instance,
4195
                                           migration_info,
4196
                                           self.nodes_ip[target_node])
4197

    
4198
    msg = result.fail_msg
4199
    if msg:
4200
      logging.error("Instance pre-migration failed, trying to revert"
4201
                    " disk status: %s", msg)
4202
      self._AbortMigration()
4203
      self._RevertDiskStatus()
4204
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
4205
                               (instance.name, msg))
4206

    
4207
    self.feedback_fn("* migrating instance to %s" % target_node)
4208
    time.sleep(10)
4209
    result = self.rpc.call_instance_migrate(source_node, instance,
4210
                                            self.nodes_ip[target_node],
4211
                                            self.live)
4212
    msg = result.fail_msg
4213
    if msg:
4214
      logging.error("Instance migration failed, trying to revert"
4215
                    " disk status: %s", msg)
4216
      self._AbortMigration()
4217
      self._RevertDiskStatus()
4218
      raise errors.OpExecError("Could not migrate instance %s: %s" %
4219
                               (instance.name, msg))
4220
    time.sleep(10)
4221

    
4222
    instance.primary_node = target_node
4223
    # distribute new instance config to the other nodes
4224
    self.cfg.Update(instance)
4225

    
4226
    result = self.rpc.call_finalize_migration(target_node,
4227
                                              instance,
4228
                                              migration_info,
4229
                                              True)
4230
    msg = result.fail_msg
4231
    if msg:
4232
      logging.error("Instance migration succeeded, but finalization failed:"
4233
                    " %s" % msg)
4234
      raise errors.OpExecError("Could not finalize instance migration: %s" %
4235
                               msg)
4236

    
4237
    self._EnsureSecondary(source_node)
4238
    self._WaitUntilSync()
4239
    self._GoStandalone()
4240
    self._GoReconnect(False)
4241
    self._WaitUntilSync()
4242

    
4243
    self.feedback_fn("* done")
4244

    
4245
  def Exec(self, feedback_fn):
4246
    """Perform the migration.
4247

4248
    """
4249
    self.feedback_fn = feedback_fn
4250

    
4251
    self.source_node = self.instance.primary_node
4252
    self.target_node = self.instance.secondary_nodes[0]
4253
    self.all_nodes = [self.source_node, self.target_node]
4254
    self.nodes_ip = {
4255
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
4256
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
4257
      }
4258

    
4259
    if self.cleanup:
4260
      return self._ExecCleanup()
4261
    else:
4262
      return self._ExecMigration()
4263

    
4264

    
4265
def _CreateBlockDev(lu, node, instance, device, force_create,
4266
                    info, force_open):
4267
  """Create a tree of block devices on a given node.
4268

4269
  If this device type has to be created on secondaries, create it and
4270
  all its children.
4271

4272
  If not, just recurse to children keeping the same 'force' value.
4273

4274
  @param lu: the lu on whose behalf we execute
4275
  @param node: the node on which to create the device
4276
  @type instance: L{objects.Instance}
4277
  @param instance: the instance which owns the device
4278
  @type device: L{objects.Disk}
4279
  @param device: the device to create
4280
  @type force_create: boolean
4281
  @param force_create: whether to force creation of this device; this
4282
      will be change to True whenever we find a device which has
4283
      CreateOnSecondary() attribute
4284
  @param info: the extra 'metadata' we should attach to the device
4285
      (this will be represented as a LVM tag)
4286
  @type force_open: boolean
4287
  @param force_open: this parameter will be passes to the
4288
      L{backend.BlockdevCreate} function where it specifies
4289
      whether we run on primary or not, and it affects both
4290
      the child assembly and the device own Open() execution
4291

4292
  """
4293
  if device.CreateOnSecondary():
4294
    force_create = True
4295

    
4296
  if device.children:
4297
    for child in device.children:
4298
      _CreateBlockDev(lu, node, instance, child, force_create,
4299
                      info, force_open)
4300

    
4301
  if not force_create:
4302
    return
4303

    
4304
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
4305

    
4306

    
4307
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
4308
  """Create a single block device on a given node.
4309

4310
  This will not recurse over children of the device, so they must be
4311
  created in advance.
4312

4313
  @param lu: the lu on whose behalf we execute
4314
  @param node: the node on which to create the device
4315
  @type instance: L{objects.Instance}
4316
  @param instance: the instance which owns the device
4317
  @type device: L{objects.Disk}
4318
  @param device: the device to create
4319
  @param info: the extra 'metadata' we should attach to the device
4320
      (this will be represented as a LVM tag)
4321
  @type force_open: boolean
4322
  @param force_open: this parameter will be passes to the
4323
      L{backend.BlockdevCreate} function where it specifies
4324
      whether we run on primary or not, and it affects both
4325
      the child assembly and the device own Open() execution
4326

4327
  """
4328
  lu.cfg.SetDiskID(device, node)
4329
  result = lu.rpc.call_blockdev_create(node, device, device.size,
4330
                                       instance.name, force_open, info)
4331
  result.Raise("Can't create block device %s on"
4332
               " node %s for instance %s" % (device, node, instance.name))
4333
  if device.physical_id is None:
4334
    device.physical_id = result.payload
4335

    
4336

    
4337
def _GenerateUniqueNames(lu, exts):
4338
  """Generate a suitable LV name.
4339

4340
  This will generate a logical volume name for the given instance.
4341

4342
  """
4343
  results = []
4344
  for val in exts:
4345
    new_id = lu.cfg.GenerateUniqueID()
4346
    results.append("%s%s" % (new_id, val))
4347
  return results
4348

    
4349

    
4350
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
4351
                         p_minor, s_minor):
4352
  """Generate a drbd8 device complete with its children.
4353

4354
  """
4355
  port = lu.cfg.AllocatePort()
4356
  vgname = lu.cfg.GetVGName()
4357
  shared_secret = lu.cfg.GenerateDRBDSecret()
4358
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
4359
                          logical_id=(vgname, names[0]))
4360
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
4361
                          logical_id=(vgname, names[1]))
4362
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
4363
                          logical_id=(primary, secondary, port,
4364
                                      p_minor, s_minor,
4365
                                      shared_secret),
4366
                          children=[dev_data, dev_meta],
4367
                          iv_name=iv_name)
4368
  return drbd_dev
4369

    
4370

    
4371
def _GenerateDiskTemplate(lu, template_name,
4372
                          instance_name, primary_node,
4373
                          secondary_nodes, disk_info,
4374
                          file_storage_dir, file_driver,
4375
                          base_index):
4376
  """Generate the entire disk layout for a given template type.
4377

4378
  """
4379
  #TODO: compute space requirements
4380

    
4381
  vgname = lu.cfg.GetVGName()
4382
  disk_count = len(disk_info)
4383
  disks = []
4384
  if template_name == constants.DT_DISKLESS:
4385
    pass
4386
  elif template_name == constants.DT_PLAIN:
4387
    if len(secondary_nodes) != 0:
4388
      raise errors.ProgrammerError("Wrong template configuration")
4389

    
4390
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
4391
                                      for i in range(disk_count)])
4392
    for idx, disk in enumerate(disk_info):
4393
      disk_index = idx + base_index
4394
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
4395
                              logical_id=(vgname, names[idx]),
4396
                              iv_name="disk/%d" % disk_index,
4397
                              mode=disk["mode"])
4398
      disks.append(disk_dev)
4399
  elif template_name == constants.DT_DRBD8:
4400
    if len(secondary_nodes) != 1:
4401
      raise errors.ProgrammerError("Wrong template configuration")
4402
    remote_node = secondary_nodes[0]
4403
    minors = lu.cfg.AllocateDRBDMinor(
4404
      [primary_node, remote_node] * len(disk_info), instance_name)
4405

    
4406
    names = []
4407
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
4408
                                               for i in range(disk_count)]):
4409
      names.append(lv_prefix + "_data")
4410
      names.append(lv_prefix + "_meta")
4411
    for idx, disk in enumerate(disk_info):
4412
      disk_index = idx + base_index
4413
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
4414
                                      disk["size"], names[idx*2:idx*2+2],
4415
                                      "disk/%d" % disk_index,
4416
                                      minors[idx*2], minors[idx*2+1])
4417
      disk_dev.mode = disk["mode"]
4418
      disks.append(disk_dev)
4419
  elif template_name == constants.DT_FILE:
4420
    if len(secondary_nodes) != 0:
4421
      raise errors.ProgrammerError("Wrong template configuration")
4422

    
4423
    for idx, disk in enumerate(disk_info):
4424
      disk_index = idx + base_index
4425
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
4426
                              iv_name="disk/%d" % disk_index,
4427
                              logical_id=(file_driver,
4428
                                          "%s/disk%d" % (file_storage_dir,
4429
                                                         disk_index)),
4430
                              mode=disk["mode"])
4431
      disks.append(disk_dev)
4432
  else:
4433
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
4434
  return disks
4435

    
4436

    
4437
def _GetInstanceInfoText(instance):
4438
  """Compute that text that should be added to the disk's metadata.
4439

4440
  """
4441
  return "originstname+%s" % instance.name
4442

    
4443

    
4444
def _CreateDisks(lu, instance):
4445
  """Create all disks for an instance.
4446

4447
  This abstracts away some work from AddInstance.
4448

4449
  @type lu: L{LogicalUnit}
4450
  @param lu: the logical unit on whose behalf we execute
4451
  @type instance: L{objects.Instance}
4452
  @param instance: the instance whose disks we should create
4453
  @rtype: boolean
4454
  @return: the success of the creation
4455

4456
  """
4457
  info = _GetInstanceInfoText(instance)
4458
  pnode = instance.primary_node
4459

    
4460
  if instance.disk_template == constants.DT_FILE:
4461
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4462
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
4463

    
4464
    result.Raise("Failed to create directory '%s' on"
4465
                 " node %s: %s" % (file_storage_dir, pnode))
4466

    
4467
  # Note: this needs to be kept in sync with adding of disks in
4468
  # LUSetInstanceParams
4469
  for device in instance.disks:
4470
    logging.info("Creating volume %s for instance %s",
4471
                 device.iv_name, instance.name)
4472
    #HARDCODE
4473
    for node in instance.all_nodes:
4474
      f_create = node == pnode
4475
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
4476

    
4477

    
4478
def _RemoveDisks(lu, instance):
4479
  """Remove all disks for an instance.
4480

4481
  This abstracts away some work from `AddInstance()` and
4482
  `RemoveInstance()`. Note that in case some of the devices couldn't
4483
  be removed, the removal will continue with the other ones (compare
4484
  with `_CreateDisks()`).
4485

4486
  @type lu: L{LogicalUnit}
4487
  @param lu: the logical unit on whose behalf we execute
4488
  @type instance: L{objects.Instance}
4489
  @param instance: the instance whose disks we should remove
4490
  @rtype: boolean
4491
  @return: the success of the removal
4492

4493
  """
4494
  logging.info("Removing block devices for instance %s", instance.name)
4495

    
4496
  all_result = True
4497
  for device in instance.disks:
4498
    for node, disk in device.ComputeNodeTree(instance.primary_node):
4499
      lu.cfg.SetDiskID(disk, node)
4500
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
4501
      if msg:
4502
        lu.LogWarning("Could not remove block device %s on node %s,"
4503
                      " continuing anyway: %s", device.iv_name, node, msg)
4504
        all_result = False
4505

    
4506
  if instance.disk_template == constants.DT_FILE:
4507
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4508
    result = lu.rpc.call_file_storage_dir_remove(instance.primary_node,
4509
                                                 file_storage_dir)
4510
    msg = result.fail_msg
4511
    if msg:
4512
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
4513
                    file_storage_dir, instance.primary_node, msg)
4514
      all_result = False
4515

    
4516
  return all_result
4517

    
4518

    
4519
def _ComputeDiskSize(disk_template, disks):
4520
  """Compute disk size requirements in the volume group
4521

4522
  """
4523
  # Required free disk space as a function of disk and swap space
4524
  req_size_dict = {
4525
    constants.DT_DISKLESS: None,
4526
    constants.DT_PLAIN: sum(d["size"] for d in disks),
4527
    # 128 MB are added for drbd metadata for each disk
4528
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
4529
    constants.DT_FILE: None,
4530
  }
4531

    
4532
  if disk_template not in req_size_dict:
4533
    raise errors.ProgrammerError("Disk template '%s' size requirement"
4534
                                 " is unknown" %  disk_template)
4535

    
4536
  return req_size_dict[disk_template]
4537

    
4538

    
4539
def _CheckHVParams(lu, nodenames, hvname, hvparams):
4540
  """Hypervisor parameter validation.
4541

4542
  This function abstract the hypervisor parameter validation to be
4543
  used in both instance create and instance modify.
4544

4545
  @type lu: L{LogicalUnit}
4546
  @param lu: the logical unit for which we check
4547
  @type nodenames: list
4548
  @param nodenames: the list of nodes on which we should check
4549
  @type hvname: string
4550
  @param hvname: the name of the hypervisor we should use
4551
  @type hvparams: dict
4552
  @param hvparams: the parameters which we need to check
4553
  @raise errors.OpPrereqError: if the parameters are not valid
4554

4555
  """
4556
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
4557
                                                  hvname,
4558
                                                  hvparams)
4559
  for node in nodenames:
4560
    info = hvinfo[node]
4561
    if info.offline:
4562
      continue
4563
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
4564

    
4565

    
4566
class LUCreateInstance(LogicalUnit):
4567
  """Create an instance.
4568

4569
  """
4570
  HPATH = "instance-add"
4571
  HTYPE = constants.HTYPE_INSTANCE
4572
  _OP_REQP = ["instance_name", "disks", "disk_template",
4573
              "mode", "start",
4574
              "wait_for_sync", "ip_check", "nics",
4575
              "hvparams", "beparams"]
4576
  REQ_BGL = False
4577

    
4578
  def _ExpandNode(self, node):
4579
    """Expands and checks one node name.
4580

4581
    """
4582
    node_full = self.cfg.ExpandNodeName(node)
4583
    if node_full is None:
4584
      raise errors.OpPrereqError("Unknown node %s" % node)
4585
    return node_full
4586

    
4587
  def ExpandNames(self):
4588
    """ExpandNames for CreateInstance.
4589

4590
    Figure out the right locks for instance creation.
4591

4592
    """
4593
    self.needed_locks = {}
4594

    
4595
    # set optional parameters to none if they don't exist
4596
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
4597
      if not hasattr(self.op, attr):
4598
        setattr(self.op, attr, None)
4599

    
4600
    # cheap checks, mostly valid constants given
4601

    
4602
    # verify creation mode
4603
    if self.op.mode not in (constants.INSTANCE_CREATE,
4604
                            constants.INSTANCE_IMPORT):
4605
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
4606
                                 self.op.mode)
4607

    
4608
    # disk template and mirror node verification
4609
    if self.op.disk_template not in constants.DISK_TEMPLATES:
4610
      raise errors.OpPrereqError("Invalid disk template name")
4611

    
4612
    if self.op.hypervisor is None:
4613
      self.op.hypervisor = self.cfg.GetHypervisorType()
4614

    
4615
    cluster = self.cfg.GetClusterInfo()
4616
    enabled_hvs = cluster.enabled_hypervisors
4617
    if self.op.hypervisor not in enabled_hvs:
4618
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
4619
                                 " cluster (%s)" % (self.op.hypervisor,
4620
                                  ",".join(enabled_hvs)))
4621

    
4622
    # check hypervisor parameter syntax (locally)
4623
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4624
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
4625
                                  self.op.hvparams)
4626
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
4627
    hv_type.CheckParameterSyntax(filled_hvp)
4628
    self.hv_full = filled_hvp
4629

    
4630
    # fill and remember the beparams dict
4631
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4632
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
4633
                                    self.op.beparams)
4634

    
4635
    #### instance parameters check
4636

    
4637
    # instance name verification
4638
    hostname1 = utils.HostInfo(self.op.instance_name)
4639
    self.op.instance_name = instance_name = hostname1.name
4640

    
4641
    # this is just a preventive check, but someone might still add this
4642
    # instance in the meantime, and creation will fail at lock-add time
4643
    if instance_name in self.cfg.GetInstanceList():
4644
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4645
                                 instance_name)
4646

    
4647
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
4648

    
4649
    # NIC buildup
4650
    self.nics = []
4651
    for idx, nic in enumerate(self.op.nics):
4652
      nic_mode_req = nic.get("mode", None)
4653
      nic_mode = nic_mode_req
4654
      if nic_mode is None:
4655
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
4656

    
4657
      # in routed mode, for the first nic, the default ip is 'auto'
4658
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
4659
        default_ip_mode = constants.VALUE_AUTO
4660
      else:
4661
        default_ip_mode = constants.VALUE_NONE
4662

    
4663
      # ip validity checks
4664
      ip = nic.get("ip", default_ip_mode)
4665
      if ip is None or ip.lower() == constants.VALUE_NONE:
4666
        nic_ip = None
4667
      elif ip.lower() == constants.VALUE_AUTO:
4668
        nic_ip = hostname1.ip
4669
      else:
4670
        if not utils.IsValidIP(ip):
4671
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
4672
                                     " like a valid IP" % ip)
4673
        nic_ip = ip
4674

    
4675
      # TODO: check the ip for uniqueness !!
4676
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
4677
        raise errors.OpPrereqError("Routed nic mode requires an ip address")
4678

    
4679
      # MAC address verification
4680
      mac = nic.get("mac", constants.VALUE_AUTO)
4681
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
4682
        if not utils.IsValidMac(mac.lower()):
4683
          raise errors.OpPrereqError("Invalid MAC address specified: %s" %
4684
                                     mac)
4685
      # bridge verification
4686
      bridge = nic.get("bridge", None)
4687
      link = nic.get("link", None)
4688
      if bridge and link:
4689
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
4690
                                   " at the same time")
4691
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
4692
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic")
4693
      elif bridge:
4694
        link = bridge
4695

    
4696
      nicparams = {}
4697
      if nic_mode_req:
4698
        nicparams[constants.NIC_MODE] = nic_mode_req
4699
      if link:
4700
        nicparams[constants.NIC_LINK] = link
4701

    
4702
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4703
                                      nicparams)
4704
      objects.NIC.CheckParameterSyntax(check_params)
4705
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
4706

    
4707
    # disk checks/pre-build
4708
    self.disks = []
4709
    for disk in self.op.disks:
4710
      mode = disk.get("mode", constants.DISK_RDWR)
4711
      if mode not in constants.DISK_ACCESS_SET:
4712
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
4713
                                   mode)
4714
      size = disk.get("size", None)
4715
      if size is None:
4716
        raise errors.OpPrereqError("Missing disk size")
4717
      try:
4718
        size = int(size)
4719
      except ValueError:
4720
        raise errors.OpPrereqError("Invalid disk size '%s'" % size)
4721
      self.disks.append({"size": size, "mode": mode})
4722

    
4723
    # used in CheckPrereq for ip ping check
4724
    self.check_ip = hostname1.ip
4725

    
4726
    # file storage checks
4727
    if (self.op.file_driver and
4728
        not self.op.file_driver in constants.FILE_DRIVER):
4729
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
4730
                                 self.op.file_driver)
4731

    
4732
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
4733
      raise errors.OpPrereqError("File storage directory path not absolute")
4734

    
4735
    ### Node/iallocator related checks
4736
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
4737
      raise errors.OpPrereqError("One and only one of iallocator and primary"
4738
                                 " node must be given")
4739

    
4740
    if self.op.iallocator:
4741
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4742
    else:
4743
      self.op.pnode = self._ExpandNode(self.op.pnode)
4744
      nodelist = [self.op.pnode]
4745
      if self.op.snode is not None:
4746
        self.op.snode = self._ExpandNode(self.op.snode)
4747
        nodelist.append(self.op.snode)
4748
      self.needed_locks[locking.LEVEL_NODE] = nodelist
4749

    
4750
    # in case of import lock the source node too
4751
    if self.op.mode == constants.INSTANCE_IMPORT:
4752
      src_node = getattr(self.op, "src_node", None)
4753
      src_path = getattr(self.op, "src_path", None)
4754

    
4755
      if src_path is None:
4756
        self.op.src_path = src_path = self.op.instance_name
4757

    
4758
      if src_node is None:
4759
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4760
        self.op.src_node = None
4761
        if os.path.isabs(src_path):
4762
          raise errors.OpPrereqError("Importing an instance from an absolute"
4763
                                     " path requires a source node option.")
4764
      else:
4765
        self.op.src_node = src_node = self._ExpandNode(src_node)
4766
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4767
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
4768
        if not os.path.isabs(src_path):
4769
          self.op.src_path = src_path = \
4770
            os.path.join(constants.EXPORT_DIR, src_path)
4771

    
4772
    else: # INSTANCE_CREATE
4773
      if getattr(self.op, "os_type", None) is None:
4774
        raise errors.OpPrereqError("No guest OS specified")
4775

    
4776
  def _RunAllocator(self):
4777
    """Run the allocator based on input opcode.
4778

4779
    """
4780
    nics = [n.ToDict() for n in self.nics]
4781
    ial = IAllocator(self.cfg, self.rpc,
4782
                     mode=constants.IALLOCATOR_MODE_ALLOC,
4783
                     name=self.op.instance_name,
4784
                     disk_template=self.op.disk_template,
4785
                     tags=[],
4786
                     os=self.op.os_type,
4787
                     vcpus=self.be_full[constants.BE_VCPUS],
4788
                     mem_size=self.be_full[constants.BE_MEMORY],
4789
                     disks=self.disks,
4790
                     nics=nics,
4791
                     hypervisor=self.op.hypervisor,
4792
                     )
4793

    
4794
    ial.Run(self.op.iallocator)
4795

    
4796
    if not ial.success:
4797
      raise errors.OpPrereqError("Can't compute nodes using"
4798
                                 " iallocator '%s': %s" % (self.op.iallocator,
4799
                                                           ial.info))
4800
    if len(ial.nodes) != ial.required_nodes:
4801
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
4802
                                 " of nodes (%s), required %s" %
4803
                                 (self.op.iallocator, len(ial.nodes),
4804
                                  ial.required_nodes))
4805
    self.op.pnode = ial.nodes[0]
4806
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
4807
                 self.op.instance_name, self.op.iallocator,
4808
                 ", ".join(ial.nodes))
4809
    if ial.required_nodes == 2:
4810
      self.op.snode = ial.nodes[1]
4811

    
4812
  def BuildHooksEnv(self):
4813
    """Build hooks env.
4814

4815
    This runs on master, primary and secondary nodes of the instance.
4816

4817
    """
4818
    env = {
4819
      "ADD_MODE": self.op.mode,
4820
      }
4821
    if self.op.mode == constants.INSTANCE_IMPORT:
4822
      env["SRC_NODE"] = self.op.src_node
4823
      env["SRC_PATH"] = self.op.src_path
4824
      env["SRC_IMAGES"] = self.src_images
4825

    
4826
    env.update(_BuildInstanceHookEnv(
4827
      name=self.op.instance_name,
4828
      primary_node=self.op.pnode,
4829
      secondary_nodes=self.secondaries,
4830
      status=self.op.start,
4831
      os_type=self.op.os_type,
4832
      memory=self.be_full[constants.BE_MEMORY],
4833
      vcpus=self.be_full[constants.BE_VCPUS],
4834
      nics=_NICListToTuple(self, self.nics),
4835
      disk_template=self.op.disk_template,
4836
      disks=[(d["size"], d["mode"]) for d in self.disks],
4837
      bep=self.be_full,
4838
      hvp=self.hv_full,
4839
      hypervisor_name=self.op.hypervisor,
4840
    ))
4841

    
4842
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
4843
          self.secondaries)
4844
    return env, nl, nl
4845

    
4846

    
4847
  def CheckPrereq(self):
4848
    """Check prerequisites.
4849

4850
    """
4851
    if (not self.cfg.GetVGName() and
4852
        self.op.disk_template not in constants.DTS_NOT_LVM):
4853
      raise errors.OpPrereqError("Cluster does not support lvm-based"
4854
                                 " instances")
4855

    
4856
    if self.op.mode == constants.INSTANCE_IMPORT:
4857
      src_node = self.op.src_node
4858
      src_path = self.op.src_path
4859

    
4860
      if src_node is None:
4861
        locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
4862
        exp_list = self.rpc.call_export_list(locked_nodes)
4863
        found = False
4864
        for node in exp_list:
4865
          if exp_list[node].fail_msg:
4866
            continue
4867
          if src_path in exp_list[node].payload:
4868
            found = True
4869
            self.op.src_node = src_node = node
4870
            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
4871
                                                       src_path)
4872
            break
4873
        if not found:
4874
          raise errors.OpPrereqError("No export found for relative path %s" %
4875
                                      src_path)
4876

    
4877
      _CheckNodeOnline(self, src_node)
4878
      result = self.rpc.call_export_info(src_node, src_path)
4879
      result.Raise("No export or invalid export found in dir %s" % src_path)
4880

    
4881
      export_info = objects.SerializableConfigParser.Loads(str(result.payload))
4882
      if not export_info.has_section(constants.INISECT_EXP):
4883
        raise errors.ProgrammerError("Corrupted export config")
4884

    
4885
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
4886
      if (int(ei_version) != constants.EXPORT_VERSION):
4887
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
4888
                                   (ei_version, constants.EXPORT_VERSION))
4889

    
4890
      # Check that the new instance doesn't have less disks than the export
4891
      instance_disks = len(self.disks)
4892
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
4893
      if instance_disks < export_disks:
4894
        raise errors.OpPrereqError("Not enough disks to import."
4895
                                   " (instance: %d, export: %d)" %
4896
                                   (instance_disks, export_disks))
4897

    
4898
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
4899
      disk_images = []
4900
      for idx in range(export_disks):
4901
        option = 'disk%d_dump' % idx
4902
        if export_info.has_option(constants.INISECT_INS, option):
4903
          # FIXME: are the old os-es, disk sizes, etc. useful?
4904
          export_name = export_info.get(constants.INISECT_INS, option)
4905
          image = os.path.join(src_path, export_name)
4906
          disk_images.append(image)
4907
        else:
4908
          disk_images.append(False)
4909

    
4910
      self.src_images = disk_images
4911

    
4912
      old_name = export_info.get(constants.INISECT_INS, 'name')
4913
      # FIXME: int() here could throw a ValueError on broken exports
4914
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
4915
      if self.op.instance_name == old_name:
4916
        for idx, nic in enumerate(self.nics):
4917
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
4918
            nic_mac_ini = 'nic%d_mac' % idx
4919
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
4920

    
4921
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
4922
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
4923
    if self.op.start and not self.op.ip_check:
4924
      raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
4925
                                 " adding an instance in start mode")
4926

    
4927
    if self.op.ip_check:
4928
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
4929
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4930
                                   (self.check_ip, self.op.instance_name))
4931

    
4932
    #### mac address generation
4933
    # By generating here the mac address both the allocator and the hooks get
4934
    # the real final mac address rather than the 'auto' or 'generate' value.
4935
    # There is a race condition between the generation and the instance object
4936
    # creation, which means that we know the mac is valid now, but we're not
4937
    # sure it will be when we actually add the instance. If things go bad
4938
    # adding the instance will abort because of a duplicate mac, and the
4939
    # creation job will fail.
4940
    for nic in self.nics:
4941
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
4942
        nic.mac = self.cfg.GenerateMAC()
4943

    
4944
    #### allocator run
4945

    
4946
    if self.op.iallocator is not None:
4947
      self._RunAllocator()
4948

    
4949
    #### node related checks
4950

    
4951
    # check primary node
4952
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
4953
    assert self.pnode is not None, \
4954
      "Cannot retrieve locked node %s" % self.op.pnode
4955
    if pnode.offline:
4956
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
4957
                                 pnode.name)
4958
    if pnode.drained:
4959
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
4960
                                 pnode.name)
4961

    
4962
    self.secondaries = []
4963

    
4964
    # mirror node verification
4965
    if self.op.disk_template in constants.DTS_NET_MIRROR:
4966
      if self.op.snode is None:
4967
        raise errors.OpPrereqError("The networked disk templates need"
4968
                                   " a mirror node")
4969
      if self.op.snode == pnode.name:
4970
        raise errors.OpPrereqError("The secondary node cannot be"
4971
                                   " the primary node.")
4972
      _CheckNodeOnline(self, self.op.snode)
4973
      _CheckNodeNotDrained(self, self.op.snode)
4974
      self.secondaries.append(self.op.snode)
4975

    
4976
    nodenames = [pnode.name] + self.secondaries
4977

    
4978
    req_size = _ComputeDiskSize(self.op.disk_template,
4979
                                self.disks)
4980

    
4981
    # Check lv size requirements
4982
    if req_size is not None:
4983
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
4984
                                         self.op.hypervisor)
4985
      for node in nodenames:
4986
        info = nodeinfo[node]
4987
        info.Raise("Cannot get current information from node %s" % node)
4988
        info = info.payload
4989
        vg_free = info.get('vg_free', None)
4990
        if not isinstance(vg_free, int):
4991
          raise errors.OpPrereqError("Can't compute free disk space on"
4992
                                     " node %s" % node)
4993
        if req_size > vg_free:
4994
          raise errors.OpPrereqError("Not enough disk space on target node %s."
4995
                                     " %d MB available, %d MB required" %
4996
                                     (node, vg_free, req_size))
4997

    
4998
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
4999

    
5000
    # os verification
5001
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
5002
    result.Raise("OS '%s' not in supported os list for primary node %s" %
5003
                 (self.op.os_type, pnode.name), prereq=True)
5004

    
5005
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
5006

    
5007
    # memory check on primary node
5008
    if self.op.start:
5009
      _CheckNodeFreeMemory(self, self.pnode.name,
5010
                           "creating instance %s" % self.op.instance_name,
5011
                           self.be_full[constants.BE_MEMORY],
5012
                           self.op.hypervisor)
5013

    
5014
    self.dry_run_result = list(nodenames)
5015

    
5016
  def Exec(self, feedback_fn):
5017
    """Create and add the instance to the cluster.
5018

5019
    """
5020
    instance = self.op.instance_name
5021
    pnode_name = self.pnode.name
5022

    
5023
    ht_kind = self.op.hypervisor
5024
    if ht_kind in constants.HTS_REQ_PORT:
5025
      network_port = self.cfg.AllocatePort()
5026
    else:
5027
      network_port = None
5028

    
5029
    ##if self.op.vnc_bind_address is None:
5030
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
5031

    
5032
    # this is needed because os.path.join does not accept None arguments
5033
    if self.op.file_storage_dir is None:
5034
      string_file_storage_dir = ""
5035
    else:
5036
      string_file_storage_dir = self.op.file_storage_dir
5037

    
5038
    # build the full file storage dir path
5039
    file_storage_dir = os.path.normpath(os.path.join(
5040
                                        self.cfg.GetFileStorageDir(),
5041
                                        string_file_storage_dir, instance))
5042

    
5043

    
5044
    disks = _GenerateDiskTemplate(self,
5045
                                  self.op.disk_template,
5046
                                  instance, pnode_name,
5047
                                  self.secondaries,
5048
                                  self.disks,
5049
                                  file_storage_dir,
5050
                                  self.op.file_driver,
5051
                                  0)
5052

    
5053
    iobj = objects.Instance(name=instance, os=self.op.os_type,
5054
                            primary_node=pnode_name,
5055
                            nics=self.nics, disks=disks,
5056
                            disk_template=self.op.disk_template,
5057
                            admin_up=False,
5058
                            network_port=network_port,
5059
                            beparams=self.op.beparams,
5060
                            hvparams=self.op.hvparams,
5061
                            hypervisor=self.op.hypervisor,
5062
                            )
5063

    
5064
    feedback_fn("* creating instance disks...")
5065
    try:
5066
      _CreateDisks(self, iobj)
5067
    except errors.OpExecError:
5068
      self.LogWarning("Device creation failed, reverting...")
5069
      try:
5070
        _RemoveDisks(self, iobj)
5071
      finally:
5072
        self.cfg.ReleaseDRBDMinors(instance)
5073
        raise
5074

    
5075
    feedback_fn("adding instance %s to cluster config" % instance)
5076

    
5077
    self.cfg.AddInstance(iobj)
5078
    # Declare that we don't want to remove the instance lock anymore, as we've
5079
    # added the instance to the config
5080
    del self.remove_locks[locking.LEVEL_INSTANCE]
5081
    # Unlock all the nodes
5082
    if self.op.mode == constants.INSTANCE_IMPORT:
5083
      nodes_keep = [self.op.src_node]
5084
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
5085
                       if node != self.op.src_node]
5086
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
5087
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
5088
    else:
5089
      self.context.glm.release(locking.LEVEL_NODE)
5090
      del self.acquired_locks[locking.LEVEL_NODE]
5091

    
5092
    if self.op.wait_for_sync:
5093
      disk_abort = not _WaitForSync(self, iobj)
5094
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
5095
      # make sure the disks are not degraded (still sync-ing is ok)
5096
      time.sleep(15)
5097
      feedback_fn("* checking mirrors status")
5098
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
5099
    else:
5100
      disk_abort = False
5101

    
5102
    if disk_abort:
5103
      _RemoveDisks(self, iobj)
5104
      self.cfg.RemoveInstance(iobj.name)
5105
      # Make sure the instance lock gets removed
5106
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
5107
      raise errors.OpExecError("There are some degraded disks for"
5108
                               " this instance")
5109

    
5110
    feedback_fn("creating os for instance %s on node %s" %
5111
                (instance, pnode_name))
5112

    
5113
    if iobj.disk_template != constants.DT_DISKLESS:
5114
      if self.op.mode == constants.INSTANCE_CREATE:
5115
        feedback_fn("* running the instance OS create scripts...")
5116
        result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
5117
        result.Raise("Could not add os for instance %s"
5118
                     " on node %s" % (instance, pnode_name))
5119

    
5120
      elif self.op.mode == constants.INSTANCE_IMPORT:
5121
        feedback_fn("* running the instance OS import scripts...")
5122
        src_node = self.op.src_node
5123
        src_images = self.src_images
5124
        cluster_name = self.cfg.GetClusterName()
5125
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
5126
                                                         src_node, src_images,
5127
                                                         cluster_name)
5128
        msg = import_result.fail_msg
5129
        if msg:
5130
          self.LogWarning("Error while importing the disk images for instance"
5131
                          " %s on node %s: %s" % (instance, pnode_name, msg))
5132
      else:
5133
        # also checked in the prereq part
5134
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
5135
                                     % self.op.mode)
5136

    
5137
    if self.op.start:
5138
      iobj.admin_up = True
5139
      self.cfg.Update(iobj)
5140
      logging.info("Starting instance %s on node %s", instance, pnode_name)
5141
      feedback_fn("* starting instance...")
5142
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
5143
      result.Raise("Could not start instance")
5144

    
5145
    return list(iobj.all_nodes)
5146

    
5147

    
5148
class LUConnectConsole(NoHooksLU):
5149
  """Connect to an instance's console.
5150

5151
  This is somewhat special in that it returns the command line that
5152
  you need to run on the master node in order to connect to the
5153
  console.
5154

5155
  """
5156
  _OP_REQP = ["instance_name"]
5157
  REQ_BGL = False
5158

    
5159
  def ExpandNames(self):
5160
    self._ExpandAndLockInstance()
5161

    
5162
  def CheckPrereq(self):
5163
    """Check prerequisites.
5164

5165
    This checks that the instance is in the cluster.
5166

5167
    """
5168
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5169
    assert self.instance is not None, \
5170
      "Cannot retrieve locked instance %s" % self.op.instance_name
5171
    _CheckNodeOnline(self, self.instance.primary_node)
5172

    
5173
  def Exec(self, feedback_fn):
5174
    """Connect to the console of an instance
5175

5176
    """
5177
    instance = self.instance
5178
    node = instance.primary_node
5179

    
5180
    node_insts = self.rpc.call_instance_list([node],
5181
                                             [instance.hypervisor])[node]
5182
    node_insts.Raise("Can't get node information from %s" % node)
5183

    
5184
    if instance.name not in node_insts.payload:
5185
      raise errors.OpExecError("Instance %s is not running." % instance.name)
5186

    
5187
    logging.debug("Connecting to console of %s on %s", instance.name, node)
5188

    
5189
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
5190
    cluster = self.cfg.GetClusterInfo()
5191
    # beparams and hvparams are passed separately, to avoid editing the
5192
    # instance and then saving the defaults in the instance itself.
5193
    hvparams = cluster.FillHV(instance)
5194
    beparams = cluster.FillBE(instance)
5195
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
5196

    
5197
    # build ssh cmdline
5198
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
5199

    
5200

    
5201
class LUReplaceDisks(LogicalUnit):
5202
  """Replace the disks of an instance.
5203

5204
  """
5205
  HPATH = "mirrors-replace"
5206
  HTYPE = constants.HTYPE_INSTANCE
5207
  _OP_REQP = ["instance_name", "mode", "disks"]
5208
  REQ_BGL = False
5209

    
5210
  def CheckArguments(self):
5211
    if not hasattr(self.op, "remote_node"):
5212
      self.op.remote_node = None
5213
    if not hasattr(self.op, "iallocator"):
5214
      self.op.iallocator = None
5215

    
5216
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
5217
                                  self.op.iallocator)
5218

    
5219
  def ExpandNames(self):
5220
    self._ExpandAndLockInstance()
5221

    
5222
    if self.op.iallocator is not None:
5223
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5224

    
5225
    elif self.op.remote_node is not None:
5226
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
5227
      if remote_node is None:
5228
        raise errors.OpPrereqError("Node '%s' not known" %
5229
                                   self.op.remote_node)
5230

    
5231
      self.op.remote_node = remote_node
5232

    
5233
      # Warning: do not remove the locking of the new secondary here
5234
      # unless DRBD8.AddChildren is changed to work in parallel;
5235
      # currently it doesn't since parallel invocations of
5236
      # FindUnusedMinor will conflict
5237
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
5238
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5239

    
5240
    else:
5241
      self.needed_locks[locking.LEVEL_NODE] = []
5242
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5243

    
5244
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
5245
                                   self.op.iallocator, self.op.remote_node,
5246
                                   self.op.disks)
5247

    
5248
    self.tasklets.append(self.replacer)
5249

    
5250
  def DeclareLocks(self, level):
5251
    # If we're not already locking all nodes in the set we have to declare the
5252
    # instance's primary/secondary nodes.
5253
    if (level == locking.LEVEL_NODE and
5254
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
5255
      self._LockInstancesNodes()
5256

    
5257
  def BuildHooksEnv(self):
5258
    """Build hooks env.
5259

5260
    This runs on the master, the primary and all the secondaries.
5261

5262
    """
5263
    instance = self.replacer.instance
5264
    env = {
5265
      "MODE": self.op.mode,
5266
      "NEW_SECONDARY": self.op.remote_node,
5267
      "OLD_SECONDARY": instance.secondary_nodes[0],
5268
      }
5269
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5270
    nl = [
5271
      self.cfg.GetMasterNode(),
5272
      instance.primary_node,
5273
      ]
5274
    if self.op.remote_node is not None:
5275
      nl.append(self.op.remote_node)
5276
    return env, nl, nl
5277

    
5278

    
5279
class LUEvacuateNode(LogicalUnit):
5280
  """Relocate the secondary instances from a node.
5281

5282
  """
5283
  HPATH = "node-evacuate"
5284
  HTYPE = constants.HTYPE_NODE
5285
  _OP_REQP = ["node_name"]
5286
  REQ_BGL = False
5287

    
5288
  def CheckArguments(self):
5289
    if not hasattr(self.op, "remote_node"):
5290
      self.op.remote_node = None
5291
    if not hasattr(self.op, "iallocator"):
5292
      self.op.iallocator = None
5293

    
5294
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
5295
                                  self.op.remote_node,
5296
                                  self.op.iallocator)
5297

    
5298
  def ExpandNames(self):
5299
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
5300
    if self.op.node_name is None:
5301
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
5302

    
5303
    self.needed_locks = {}
5304

    
5305
    # Declare node locks
5306
    if self.op.iallocator is not None:
5307
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5308

    
5309
    elif self.op.remote_node is not None:
5310
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
5311
      if remote_node is None:
5312
        raise errors.OpPrereqError("Node '%s' not known" %
5313
                                   self.op.remote_node)
5314

    
5315
      self.op.remote_node = remote_node
5316

    
5317
      # Warning: do not remove the locking of the new secondary here
5318
      # unless DRBD8.AddChildren is changed to work in parallel;
5319
      # currently it doesn't since parallel invocations of
5320
      # FindUnusedMinor will conflict
5321
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
5322
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5323

    
5324
    else:
5325
      raise errors.OpPrereqError("Invalid parameters")
5326

    
5327
    # Create tasklets for replacing disks for all secondary instances on this
5328
    # node
5329
    names = []
5330

    
5331
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
5332
      logging.debug("Replacing disks for instance %s", inst.name)
5333
      names.append(inst.name)
5334

    
5335
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
5336
                                self.op.iallocator, self.op.remote_node, [])
5337
      self.tasklets.append(replacer)
5338

    
5339
    self.instance_names = names
5340

    
5341
    # Declare instance locks
5342
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
5343

    
5344
  def DeclareLocks(self, level):
5345
    # If we're not already locking all nodes in the set we have to declare the
5346
    # instance's primary/secondary nodes.
5347
    if (level == locking.LEVEL_NODE and
5348
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
5349
      self._LockInstancesNodes()
5350

    
5351
  def BuildHooksEnv(self):
5352
    """Build hooks env.
5353

5354
    This runs on the master, the primary and all the secondaries.
5355

5356
    """
5357
    env = {
5358
      "NODE_NAME": self.op.node_name,
5359
      }
5360

    
5361
    nl = [self.cfg.GetMasterNode()]
5362

    
5363
    if self.op.remote_node is not None:
5364
      env["NEW_SECONDARY"] = self.op.remote_node
5365
      nl.append(self.op.remote_node)
5366

    
5367
    return (env, nl, nl)
5368

    
5369

    
5370
class TLReplaceDisks(Tasklet):
5371
  """Replaces disks for an instance.
5372

5373
  Note: Locking is not within the scope of this class.
5374

5375
  """
5376
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
5377
               disks):
5378
    """Initializes this class.
5379

5380
    """
5381
    # Parameters
5382
    self.lu = lu
5383
    self.instance_name = instance_name
5384
    self.mode = mode
5385
    self.iallocator_name = iallocator_name
5386
    self.remote_node = remote_node
5387
    self.disks = disks
5388

    
5389
    # Shortcuts
5390
    self.cfg = lu.cfg
5391
    self.rpc = lu.rpc
5392

    
5393
    # Runtime data
5394
    self.instance = None
5395
    self.new_node = None
5396
    self.target_node = None
5397
    self.other_node = None
5398
    self.remote_node_info = None
5399
    self.node_secondary_ip = None
5400

    
5401
  @staticmethod
5402
  def CheckArguments(mode, remote_node, iallocator):
5403
    """Helper function for users of this class.
5404

5405
    """
5406
    # check for valid parameter combination
5407
    cnt = [remote_node, iallocator].count(None)
5408
    if mode == constants.REPLACE_DISK_CHG:
5409
      if cnt == 2:
5410
        raise errors.OpPrereqError("When changing the secondary either an"
5411
                                   " iallocator script must be used or the"
5412
                                   " new node given")
5413
      elif cnt == 0:
5414
        raise errors.OpPrereqError("Give either the iallocator or the new"
5415
                                   " secondary, not both")
5416
    else: # not replacing the secondary
5417
      if cnt != 2:
5418
        raise errors.OpPrereqError("The iallocator and new node options can"
5419
                                   " be used only when changing the"
5420
                                   " secondary node")
5421

    
5422
  @staticmethod
5423
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
5424
    """Compute a new secondary node using an IAllocator.
5425

5426
    """
5427
    ial = IAllocator(lu.cfg, lu.rpc,
5428
                     mode=constants.IALLOCATOR_MODE_RELOC,
5429
                     name=instance_name,
5430
                     relocate_from=relocate_from)
5431

    
5432
    ial.Run(iallocator_name)
5433

    
5434
    if not ial.success:
5435
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
5436
                                 " %s" % (iallocator_name, ial.info))
5437

    
5438
    if len(ial.nodes) != ial.required_nodes:
5439
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5440
                                 " of nodes (%s), required %s" %
5441
                                 (len(ial.nodes), ial.required_nodes))
5442

    
5443
    remote_node_name = ial.nodes[0]
5444

    
5445
    lu.LogInfo("Selected new secondary for instance '%s': %s",
5446
               instance_name, remote_node_name)
5447

    
5448
    return remote_node_name
5449

    
5450
  def CheckPrereq(self):
5451
    """Check prerequisites.
5452

5453
    This checks that the instance is in the cluster.
5454

5455
    """
5456
    self.instance = self.cfg.GetInstanceInfo(self.instance_name)
5457
    assert self.instance is not None, \
5458
      "Cannot retrieve locked instance %s" % self.instance_name
5459

    
5460
    if self.instance.disk_template != constants.DT_DRBD8:
5461
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
5462
                                 " instances")
5463

    
5464
    if len(self.instance.secondary_nodes) != 1:
5465
      raise errors.OpPrereqError("The instance has a strange layout,"
5466
                                 " expected one secondary but found %d" %
5467
                                 len(self.instance.secondary_nodes))
5468

    
5469
    secondary_node = self.instance.secondary_nodes[0]
5470

    
5471
    if self.iallocator_name is None:
5472
      remote_node = self.remote_node
5473
    else:
5474
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
5475
                                       self.instance.name, secondary_node)
5476

    
5477
    if remote_node is not None:
5478
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
5479
      assert self.remote_node_info is not None, \
5480
        "Cannot retrieve locked node %s" % remote_node
5481
    else:
5482
      self.remote_node_info = None
5483

    
5484
    if remote_node == self.instance.primary_node:
5485
      raise errors.OpPrereqError("The specified node is the primary node of"
5486
                                 " the instance.")
5487

    
5488
    if remote_node == secondary_node:
5489
      raise errors.OpPrereqError("The specified node is already the"
5490
                                 " secondary node of the instance.")
5491

    
5492
    if self.mode == constants.REPLACE_DISK_PRI:
5493
      self.target_node = self.instance.primary_node
5494
      self.other_node = secondary_node
5495
      check_nodes = [self.target_node, self.other_node]
5496

    
5497
    elif self.mode == constants.REPLACE_DISK_SEC:
5498
      self.target_node = secondary_node
5499
      self.other_node = self.instance.primary_node
5500
      check_nodes = [self.target_node, self.other_node]
5501

    
5502
    elif self.mode == constants.REPLACE_DISK_CHG:
5503
      self.new_node = remote_node
5504
      self.other_node = self.instance.primary_node
5505
      self.target_node = secondary_node
5506
      check_nodes = [self.new_node, self.other_node]
5507

    
5508
      _CheckNodeNotDrained(self.lu, remote_node)
5509

    
5510
    else:
5511
      raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
5512
                                   self.mode)
5513

    
5514
    for node in check_nodes:
5515
      _CheckNodeOnline(self.lu, node)
5516

    
5517
    # If not specified all disks should be replaced
5518
    if not self.disks:
5519
      self.disks = range(len(self.instance.disks))
5520

    
5521
    # Check whether disks are valid
5522
    for disk_idx in self.disks:
5523
      self.instance.FindDisk(disk_idx)
5524

    
5525
    # Get secondary node IP addresses
5526
    node_2nd_ip = {}
5527

    
5528
    for node_name in [self.target_node, self.other_node, self.new_node]:
5529
      if node_name is not None:
5530
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
5531

    
5532
    self.node_secondary_ip = node_2nd_ip
5533

    
5534
  def Exec(self, feedback_fn):
5535
    """Execute disk replacement.
5536

5537
    This dispatches the disk replacement to the appropriate handler.
5538

5539
    """
5540
    feedback_fn("Replacing disks for %s" % self.instance.name)
5541

    
5542
    activate_disks = (not self.instance.admin_up)
5543

    
5544
    # Activate the instance disks if we're replacing them on a down instance
5545
    if activate_disks:
5546
      _StartInstanceDisks(self.lu, self.instance, True)
5547

    
5548
    try:
5549
      if self.mode == constants.REPLACE_DISK_CHG:
5550
        return self._ExecDrbd8Secondary()
5551
      else:
5552
        return self._ExecDrbd8DiskOnly()
5553

    
5554
    finally:
5555
      # Deactivate the instance disks if we're replacing them on a down instance
5556
      if activate_disks:
5557
        _SafeShutdownInstanceDisks(self.lu, self.instance)
5558

    
5559
  def _CheckVolumeGroup(self, nodes):
5560
    self.lu.LogInfo("Checking volume groups")
5561

    
5562
    vgname = self.cfg.GetVGName()
5563

    
5564
    # Make sure volume group exists on all involved nodes
5565
    results = self.rpc.call_vg_list(nodes)
5566
    if not results:
5567
      raise errors.OpExecError("Can't list volume groups on the nodes")
5568

    
5569
    for node in nodes:
5570
      res = results[node]
5571
      res.Raise("Error checking node %s" % node)
5572
      if vgname not in res.payload:
5573
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
5574
                                 (vgname, node))
5575

    
5576
  def _CheckDisksExistence(self, nodes):
5577
    # Check disk existence
5578
    for idx, dev in enumerate(self.instance.disks):
5579
      if idx not in self.disks:
5580
        continue
5581

    
5582
      for node in nodes:
5583
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
5584
        self.cfg.SetDiskID(dev, node)
5585

    
5586
        result = self.rpc.call_blockdev_find(node, dev)
5587

    
5588
        msg = result.fail_msg
5589
        if msg or not result.payload:
5590
          if not msg:
5591
            msg = "disk not found"
5592
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
5593
                                   (idx, node, msg))
5594

    
5595
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
5596
    for idx, dev in enumerate(self.instance.disks):
5597
      if idx not in self.disks:
5598
        continue
5599

    
5600
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
5601
                      (idx, node_name))
5602

    
5603
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
5604
                                   ldisk=ldisk):
5605
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
5606
                                 " replace disks for instance %s" %
5607
                                 (node_name, self.instance.name))
5608

    
5609
  def _CreateNewStorage(self, node_name):
5610
    vgname = self.cfg.GetVGName()
5611
    iv_names = {}
5612

    
5613
    for idx, dev in enumerate(self.instance.disks):
5614
      if idx not in self.disks:
5615
        continue
5616

    
5617
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
5618

    
5619
      self.cfg.SetDiskID(dev, node_name)
5620

    
5621
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
5622
      names = _GenerateUniqueNames(self.lu, lv_names)
5623

    
5624
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
5625
                             logical_id=(vgname, names[0]))
5626
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5627
                             logical_id=(vgname, names[1]))
5628

    
5629
      new_lvs = [lv_data, lv_meta]
5630
      old_lvs = dev.children
5631
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
5632

    
5633
      # we pass force_create=True to force the LVM creation
5634
      for new_lv in new_lvs:
5635
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
5636
                        _GetInstanceInfoText(self.instance), False)
5637

    
5638
    return iv_names
5639

    
5640
  def _CheckDevices(self, node_name, iv_names):
5641
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
5642
      self.cfg.SetDiskID(dev, node_name)
5643

    
5644
      result = self.rpc.call_blockdev_find(node_name, dev)
5645

    
5646
      msg = result.fail_msg
5647
      if msg or not result.payload:
5648
        if not msg:
5649
          msg = "disk not found"
5650
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
5651
                                 (name, msg))
5652

    
5653
      if result.payload[5]:
5654
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
5655

    
5656
  def _RemoveOldStorage(self, node_name, iv_names):
5657
    for name, (dev, old_lvs, _) in iv_names.iteritems():
5658
      self.lu.LogInfo("Remove logical volumes for %s" % name)
5659

    
5660
      for lv in old_lvs:
5661
        self.cfg.SetDiskID(lv, node_name)
5662

    
5663
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
5664
        if msg:
5665
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
5666
                             hint="remove unused LVs manually")
5667

    
5668
  def _ExecDrbd8DiskOnly(self):
5669
    """Replace a disk on the primary or secondary for DRBD 8.
5670

5671
    The algorithm for replace is quite complicated:
5672

5673
      1. for each disk to be replaced:
5674

5675
        1. create new LVs on the target node with unique names
5676
        1. detach old LVs from the drbd device
5677
        1. rename old LVs to name_replaced.<time_t>
5678
        1. rename new LVs to old LVs
5679
        1. attach the new LVs (with the old names now) to the drbd device
5680

5681
      1. wait for sync across all devices
5682

5683
      1. for each modified disk:
5684

5685
        1. remove old LVs (which have the name name_replaces.<time_t>)
5686

5687
    Failures are not very well handled.
5688

5689
    """
5690
    steps_total = 6
5691

    
5692
    # Step: check device activation
5693
    self.lu.LogStep(1, steps_total, "Check device existence")
5694
    self._CheckDisksExistence([self.other_node, self.target_node])
5695
    self._CheckVolumeGroup([self.target_node, self.other_node])
5696

    
5697
    # Step: check other node consistency
5698
    self.lu.LogStep(2, steps_total, "Check peer consistency")
5699
    self._CheckDisksConsistency(self.other_node,
5700
                                self.other_node == self.instance.primary_node,
5701
                                False)
5702

    
5703
    # Step: create new storage
5704
    self.lu.LogStep(3, steps_total, "Allocate new storage")
5705
    iv_names = self._CreateNewStorage(self.target_node)
5706

    
5707
    # Step: for each lv, detach+rename*2+attach
5708
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
5709
    for dev, old_lvs, new_lvs in iv_names.itervalues():
5710
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
5711

    
5712
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev, old_lvs)
5713
      result.Raise("Can't detach drbd from local storage on node"
5714
                   " %s for device %s" % (self.target_node, dev.iv_name))
5715
      #dev.children = []
5716
      #cfg.Update(instance)
5717

    
5718
      # ok, we created the new LVs, so now we know we have the needed
5719
      # storage; as such, we proceed on the target node to rename
5720
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
5721
      # using the assumption that logical_id == physical_id (which in
5722
      # turn is the unique_id on that node)
5723

    
5724
      # FIXME(iustin): use a better name for the replaced LVs
5725
      temp_suffix = int(time.time())
5726
      ren_fn = lambda d, suff: (d.physical_id[0],
5727
                                d.physical_id[1] + "_replaced-%s" % suff)
5728

    
5729
      # Build the rename list based on what LVs exist on the node
5730
      rename_old_to_new = []
5731
      for to_ren in old_lvs:
5732
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
5733
        if not result.fail_msg and result.payload:
5734
          # device exists
5735
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
5736

    
5737
      self.lu.LogInfo("Renaming the old LVs on the target node")
5738
      result = self.rpc.call_blockdev_rename(self.target_node, rename_old_to_new)
5739
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
5740

    
5741
      # Now we rename the new LVs to the old LVs
5742
      self.lu.LogInfo("Renaming the new LVs on the target node")
5743
      rename_new_to_old = [(new, old.physical_id)
5744
                           for old, new in zip(old_lvs, new_lvs)]
5745
      result = self.rpc.call_blockdev_rename(self.target_node, rename_new_to_old)
5746
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
5747

    
5748
      for old, new in zip(old_lvs, new_lvs):
5749
        new.logical_id = old.logical_id
5750
        self.cfg.SetDiskID(new, self.target_node)
5751

    
5752
      for disk in old_lvs:
5753
        disk.logical_id = ren_fn(disk, temp_suffix)
5754
        self.cfg.SetDiskID(disk, self.target_node)
5755

    
5756
      # Now that the new lvs have the old name, we can add them to the device
5757
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
5758
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev, new_lvs)
5759
      msg = result.fail_msg
5760
      if msg:
5761
        for new_lv in new_lvs:
5762
          msg2 = self.rpc.call_blockdev_remove(self.target_node, new_lv).fail_msg
5763
          if msg2:
5764
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
5765
                               hint=("cleanup manually the unused logical"
5766
                                     "volumes"))
5767
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
5768

    
5769
      dev.children = new_lvs
5770

    
5771
      self.cfg.Update(self.instance)
5772

    
5773
    # Wait for sync
5774
    # This can fail as the old devices are degraded and _WaitForSync
5775
    # does a combined result over all disks, so we don't check its return value
5776
    self.lu.LogStep(5, steps_total, "Sync devices")
5777
    _WaitForSync(self.lu, self.instance, unlock=True)
5778

    
5779
    # Check all devices manually
5780
    self._CheckDevices(self.instance.primary_node, iv_names)
5781

    
5782
    # Step: remove old storage
5783
    self.lu.LogStep(6, steps_total, "Removing old storage")
5784
    self._RemoveOldStorage(self.target_node, iv_names)
5785

    
5786
  def _ExecDrbd8Secondary(self):
5787
    """Replace the secondary node for DRBD 8.
5788

5789
    The algorithm for replace is quite complicated:
5790
      - for all disks of the instance:
5791
        - create new LVs on the new node with same names
5792
        - shutdown the drbd device on the old secondary
5793
        - disconnect the drbd network on the primary
5794
        - create the drbd device on the new secondary
5795
        - network attach the drbd on the primary, using an artifice:
5796
          the drbd code for Attach() will connect to the network if it
5797
          finds a device which is connected to the good local disks but
5798
          not network enabled
5799
      - wait for sync across all devices
5800
      - remove all disks from the old secondary
5801

5802
    Failures are not very well handled.
5803

5804
    """
5805
    steps_total = 6
5806

    
5807
    # Step: check device activation
5808
    self.lu.LogStep(1, steps_total, "Check device existence")
5809
    self._CheckDisksExistence([self.instance.primary_node])
5810
    self._CheckVolumeGroup([self.instance.primary_node])
5811

    
5812
    # Step: check other node consistency
5813
    self.lu.LogStep(2, steps_total, "Check peer consistency")
5814
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
5815

    
5816
    # Step: create new storage
5817
    self.lu.LogStep(3, steps_total, "Allocate new storage")
5818
    for idx, dev in enumerate(self.instance.disks):
5819
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
5820
                      (self.new_node, idx))
5821
      # we pass force_create=True to force LVM creation
5822
      for new_lv in dev.children:
5823
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
5824
                        _GetInstanceInfoText(self.instance), False)
5825

    
5826
    # Step 4: dbrd minors and drbd setups changes
5827
    # after this, we must manually remove the drbd minors on both the
5828
    # error and the success paths
5829
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
5830
    minors = self.cfg.AllocateDRBDMinor([self.new_node for dev in self.instance.disks],
5831
                                        self.instance.name)
5832
    logging.debug("Allocated minors %r" % (minors,))
5833

    
5834
    iv_names = {}
5835
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
5836
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" % (self.new_node, idx))
5837
      # create new devices on new_node; note that we create two IDs:
5838
      # one without port, so the drbd will be activated without
5839
      # networking information on the new node at this stage, and one
5840
      # with network, for the latter activation in step 4
5841
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
5842
      if self.instance.primary_node == o_node1:
5843
        p_minor = o_minor1
5844
      else:
5845
        p_minor = o_minor2
5846

    
5847
      new_alone_id = (self.instance.primary_node, self.new_node, None, p_minor, new_minor, o_secret)
5848
      new_net_id = (self.instance.primary_node, self.new_node, o_port, p_minor, new_minor, o_secret)
5849

    
5850
      iv_names[idx] = (dev, dev.children, new_net_id)
5851
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
5852
                    new_net_id)
5853
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
5854
                              logical_id=new_alone_id,
5855
                              children=dev.children,
5856
                              size=dev.size)
5857
      try:
5858
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
5859
                              _GetInstanceInfoText(self.instance), False)
5860
      except errors.GenericError:
5861
        self.cfg.ReleaseDRBDMinors(self.instance.name)
5862
        raise
5863

    
5864
    # We have new devices, shutdown the drbd on the old secondary
5865
    for idx, dev in enumerate(self.instance.disks):
5866
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
5867
      self.cfg.SetDiskID(dev, self.target_node)
5868
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
5869
      if msg:
5870
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
5871
                           "node: %s" % (idx, msg),
5872
                           hint=("Please cleanup this device manually as"
5873
                                 " soon as possible"))
5874

    
5875
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
5876
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node], self.node_secondary_ip,
5877
                                               self.instance.disks)[self.instance.primary_node]
5878

    
5879
    msg = result.fail_msg
5880
    if msg:
5881
      # detaches didn't succeed (unlikely)
5882
      self.cfg.ReleaseDRBDMinors(self.instance.name)
5883
      raise errors.OpExecError("Can't detach the disks from the network on"
5884
                               " old node: %s" % (msg,))
5885

    
5886
    # if we managed to detach at least one, we update all the disks of
5887
    # the instance to point to the new secondary
5888
    self.lu.LogInfo("Updating instance configuration")
5889
    for dev, _, new_logical_id in iv_names.itervalues():
5890
      dev.logical_id = new_logical_id
5891
      self.cfg.SetDiskID(dev, self.instance.primary_node)
5892

    
5893
    self.cfg.Update(self.instance)
5894

    
5895
    # and now perform the drbd attach
5896
    self.lu.LogInfo("Attaching primary drbds to new secondary"
5897
                    " (standalone => connected)")
5898
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node, self.new_node], self.node_secondary_ip,
5899
                                           self.instance.disks, self.instance.name,
5900
                                           False)
5901
    for to_node, to_result in result.items():
5902
      msg = to_result.fail_msg
5903
      if msg:
5904
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s", to_node, msg,
5905
                           hint=("please do a gnt-instance info to see the"
5906
                                 " status of disks"))
5907

    
5908
    # Wait for sync
5909
    # This can fail as the old devices are degraded and _WaitForSync
5910
    # does a combined result over all disks, so we don't check its return value
5911
    self.lu.LogStep(5, steps_total, "Sync devices")
5912
    _WaitForSync(self.lu, self.instance, unlock=True)
5913

    
5914
    # Check all devices manually
5915
    self._CheckDevices(self.instance.primary_node, iv_names)
5916

    
5917
    # Step: remove old storage
5918
    self.lu.LogStep(6, steps_total, "Removing old storage")
5919
    self._RemoveOldStorage(self.target_node, iv_names)
5920

    
5921

    
5922
class LUGrowDisk(LogicalUnit):
5923
  """Grow a disk of an instance.
5924

5925
  """
5926
  HPATH = "disk-grow"
5927
  HTYPE = constants.HTYPE_INSTANCE
5928
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
5929
  REQ_BGL = False
5930

    
5931
  def ExpandNames(self):
5932
    self._ExpandAndLockInstance()
5933
    self.needed_locks[locking.LEVEL_NODE] = []
5934
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5935

    
5936
  def DeclareLocks(self, level):
5937
    if level == locking.LEVEL_NODE:
5938
      self._LockInstancesNodes()
5939

    
5940
  def BuildHooksEnv(self):
5941
    """Build hooks env.
5942

5943
    This runs on the master, the primary and all the secondaries.
5944

5945
    """
5946
    env = {
5947
      "DISK": self.op.disk,
5948
      "AMOUNT": self.op.amount,
5949
      }
5950
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5951
    nl = [
5952
      self.cfg.GetMasterNode(),
5953
      self.instance.primary_node,
5954
      ]
5955
    return env, nl, nl
5956

    
5957
  def CheckPrereq(self):
5958
    """Check prerequisites.
5959

5960
    This checks that the instance is in the cluster.
5961

5962
    """
5963
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5964
    assert instance is not None, \
5965
      "Cannot retrieve locked instance %s" % self.op.instance_name
5966
    nodenames = list(instance.all_nodes)
5967
    for node in nodenames:
5968
      _CheckNodeOnline(self, node)
5969

    
5970

    
5971
    self.instance = instance
5972

    
5973
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
5974
      raise errors.OpPrereqError("Instance's disk layout does not support"
5975
                                 " growing.")
5976

    
5977
    self.disk = instance.FindDisk(self.op.disk)
5978

    
5979
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
5980
                                       instance.hypervisor)
5981
    for node in nodenames:
5982
      info = nodeinfo[node]
5983
      info.Raise("Cannot get current information from node %s" % node)
5984
      vg_free = info.payload.get('vg_free', None)
5985
      if not isinstance(vg_free, int):
5986
        raise errors.OpPrereqError("Can't compute free disk space on"
5987
                                   " node %s" % node)
5988
      if self.op.amount > vg_free:
5989
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
5990
                                   " %d MiB available, %d MiB required" %
5991
                                   (node, vg_free, self.op.amount))
5992

    
5993
  def Exec(self, feedback_fn):
5994
    """Execute disk grow.
5995

5996
    """
5997
    instance = self.instance
5998
    disk = self.disk
5999
    for node in instance.all_nodes:
6000
      self.cfg.SetDiskID(disk, node)
6001
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
6002
      result.Raise("Grow request failed to node %s" % node)
6003
    disk.RecordGrow(self.op.amount)
6004
    self.cfg.Update(instance)
6005
    if self.op.wait_for_sync:
6006
      disk_abort = not _WaitForSync(self, instance)
6007
      if disk_abort:
6008
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
6009
                             " status.\nPlease check the instance.")
6010

    
6011

    
6012
class LUQueryInstanceData(NoHooksLU):
6013
  """Query runtime instance data.
6014

6015
  """
6016
  _OP_REQP = ["instances", "static"]
6017
  REQ_BGL = False
6018

    
6019
  def ExpandNames(self):
6020
    self.needed_locks = {}
6021
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
6022

    
6023
    if not isinstance(self.op.instances, list):
6024
      raise errors.OpPrereqError("Invalid argument type 'instances'")
6025

    
6026
    if self.op.instances:
6027
      self.wanted_names = []
6028
      for name in self.op.instances:
6029
        full_name = self.cfg.ExpandInstanceName(name)
6030
        if full_name is None:
6031
          raise errors.OpPrereqError("Instance '%s' not known" % name)
6032
        self.wanted_names.append(full_name)
6033
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
6034
    else:
6035
      self.wanted_names = None
6036
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
6037

    
6038
    self.needed_locks[locking.LEVEL_NODE] = []
6039
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6040

    
6041
  def DeclareLocks(self, level):
6042
    if level == locking.LEVEL_NODE:
6043
      self._LockInstancesNodes()
6044

    
6045
  def CheckPrereq(self):
6046
    """Check prerequisites.
6047

6048
    This only checks the optional instance list against the existing names.
6049

6050
    """
6051
    if self.wanted_names is None:
6052
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
6053

    
6054
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
6055
                             in self.wanted_names]
6056
    return
6057

    
6058
  def _ComputeDiskStatus(self, instance, snode, dev):
6059
    """Compute block device status.
6060

6061
    """
6062
    static = self.op.static
6063
    if not static:
6064
      self.cfg.SetDiskID(dev, instance.primary_node)
6065
      dev_pstatus = self.rpc.call_blockdev_find(instance.primary_node, dev)
6066
      if dev_pstatus.offline:
6067
        dev_pstatus = None
6068
      else:
6069
        dev_pstatus.Raise("Can't compute disk status for %s" % instance.name)
6070
        dev_pstatus = dev_pstatus.payload
6071
    else:
6072
      dev_pstatus = None
6073

    
6074
    if dev.dev_type in constants.LDS_DRBD:
6075
      # we change the snode then (otherwise we use the one passed in)
6076
      if dev.logical_id[0] == instance.primary_node:
6077
        snode = dev.logical_id[1]
6078
      else:
6079
        snode = dev.logical_id[0]
6080

    
6081
    if snode and not static:
6082
      self.cfg.SetDiskID(dev, snode)
6083
      dev_sstatus = self.rpc.call_blockdev_find(snode, dev)
6084
      if dev_sstatus.offline:
6085
        dev_sstatus = None
6086
      else:
6087
        dev_sstatus.Raise("Can't compute disk status for %s" % instance.name)
6088
        dev_sstatus = dev_sstatus.payload
6089
    else:
6090
      dev_sstatus = None
6091

    
6092
    if dev.children:
6093
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
6094
                      for child in dev.children]
6095
    else:
6096
      dev_children = []
6097

    
6098
    data = {
6099
      "iv_name": dev.iv_name,
6100
      "dev_type": dev.dev_type,
6101
      "logical_id": dev.logical_id,
6102
      "physical_id": dev.physical_id,
6103
      "pstatus": dev_pstatus,
6104
      "sstatus": dev_sstatus,
6105
      "children": dev_children,
6106
      "mode": dev.mode,
6107
      "size": dev.size,
6108
      }
6109

    
6110
    return data
6111

    
6112
  def Exec(self, feedback_fn):
6113
    """Gather and return data"""
6114
    result = {}
6115

    
6116
    cluster = self.cfg.GetClusterInfo()
6117

    
6118
    for instance in self.wanted_instances:
6119
      if not self.op.static:
6120
        remote_info = self.rpc.call_instance_info(instance.primary_node,
6121
                                                  instance.name,
6122
                                                  instance.hypervisor)
6123
        remote_info.Raise("Error checking node %s" % instance.primary_node)
6124
        remote_info = remote_info.payload
6125
        if remote_info and "state" in remote_info:
6126
          remote_state = "up"
6127
        else:
6128
          remote_state = "down"
6129
      else:
6130
        remote_state = None
6131
      if instance.admin_up:
6132
        config_state = "up"
6133
      else:
6134
        config_state = "down"
6135

    
6136
      disks = [self._ComputeDiskStatus(instance, None, device)
6137
               for device in instance.disks]
6138

    
6139
      idict = {
6140
        "name": instance.name,
6141
        "config_state": config_state,
6142
        "run_state": remote_state,
6143
        "pnode": instance.primary_node,
6144
        "snodes": instance.secondary_nodes,
6145
        "os": instance.os,
6146
        # this happens to be the same format used for hooks
6147
        "nics": _NICListToTuple(self, instance.nics),
6148
        "disks": disks,
6149
        "hypervisor": instance.hypervisor,
6150
        "network_port": instance.network_port,
6151
        "hv_instance": instance.hvparams,
6152
        "hv_actual": cluster.FillHV(instance),
6153
        "be_instance": instance.beparams,
6154
        "be_actual": cluster.FillBE(instance),
6155
        }
6156

    
6157
      result[instance.name] = idict
6158

    
6159
    return result
6160

    
6161

    
6162
class LUSetInstanceParams(LogicalUnit):
6163
  """Modifies an instances's parameters.
6164

6165
  """
6166
  HPATH = "instance-modify"
6167
  HTYPE = constants.HTYPE_INSTANCE
6168
  _OP_REQP = ["instance_name"]
6169
  REQ_BGL = False
6170

    
6171
  def CheckArguments(self):
6172
    if not hasattr(self.op, 'nics'):
6173
      self.op.nics = []
6174
    if not hasattr(self.op, 'disks'):
6175
      self.op.disks = []
6176
    if not hasattr(self.op, 'beparams'):
6177
      self.op.beparams = {}
6178
    if not hasattr(self.op, 'hvparams'):
6179
      self.op.hvparams = {}
6180
    self.op.force = getattr(self.op, "force", False)
6181
    if not (self.op.nics or self.op.disks or
6182
            self.op.hvparams or self.op.beparams):
6183
      raise errors.OpPrereqError("No changes submitted")
6184

    
6185
    # Disk validation
6186
    disk_addremove = 0
6187
    for disk_op, disk_dict in self.op.disks:
6188
      if disk_op == constants.DDM_REMOVE:
6189
        disk_addremove += 1
6190
        continue
6191
      elif disk_op == constants.DDM_ADD:
6192
        disk_addremove += 1
6193
      else:
6194
        if not isinstance(disk_op, int):
6195
          raise errors.OpPrereqError("Invalid disk index")
6196
        if not isinstance(disk_dict, dict):
6197
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
6198
          raise errors.OpPrereqError(msg)
6199

    
6200
      if disk_op == constants.DDM_ADD:
6201
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
6202
        if mode not in constants.DISK_ACCESS_SET:
6203
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
6204
        size = disk_dict.get('size', None)
6205
        if size is None:
6206
          raise errors.OpPrereqError("Required disk parameter size missing")
6207
        try:
6208
          size = int(size)
6209
        except ValueError, err:
6210
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
6211
                                     str(err))
6212
        disk_dict['size'] = size
6213
      else:
6214
        # modification of disk
6215
        if 'size' in disk_dict:
6216
          raise errors.OpPrereqError("Disk size change not possible, use"
6217
                                     " grow-disk")
6218

    
6219
    if disk_addremove > 1:
6220
      raise errors.OpPrereqError("Only one disk add or remove operation"
6221
                                 " supported at a time")
6222

    
6223
    # NIC validation
6224
    nic_addremove = 0
6225
    for nic_op, nic_dict in self.op.nics:
6226
      if nic_op == constants.DDM_REMOVE:
6227
        nic_addremove += 1
6228
        continue
6229
      elif nic_op == constants.DDM_ADD:
6230
        nic_addremove += 1
6231
      else:
6232
        if not isinstance(nic_op, int):
6233
          raise errors.OpPrereqError("Invalid nic index")
6234
        if not isinstance(nic_dict, dict):
6235
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
6236
          raise errors.OpPrereqError(msg)
6237

    
6238
      # nic_dict should be a dict
6239
      nic_ip = nic_dict.get('ip', None)
6240
      if nic_ip is not None:
6241
        if nic_ip.lower() == constants.VALUE_NONE:
6242
          nic_dict['ip'] = None
6243
        else:
6244
          if not utils.IsValidIP(nic_ip):
6245
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
6246

    
6247
      nic_bridge = nic_dict.get('bridge', None)
6248
      nic_link = nic_dict.get('link', None)
6249
      if nic_bridge and nic_link:
6250
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6251
                                   " at the same time")
6252
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
6253
        nic_dict['bridge'] = None
6254
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
6255
        nic_dict['link'] = None
6256

    
6257
      if nic_op == constants.DDM_ADD:
6258
        nic_mac = nic_dict.get('mac', None)
6259
        if nic_mac is None:
6260
          nic_dict['mac'] = constants.VALUE_AUTO
6261

    
6262
      if 'mac' in nic_dict:
6263
        nic_mac = nic_dict['mac']
6264
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6265
          if not utils.IsValidMac(nic_mac):
6266
            raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
6267
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
6268
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
6269
                                     " modifying an existing nic")
6270

    
6271
    if nic_addremove > 1:
6272
      raise errors.OpPrereqError("Only one NIC add or remove operation"
6273
                                 " supported at a time")
6274

    
6275
  def ExpandNames(self):
6276
    self._ExpandAndLockInstance()
6277
    self.needed_locks[locking.LEVEL_NODE] = []
6278
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6279

    
6280
  def DeclareLocks(self, level):
6281
    if level == locking.LEVEL_NODE:
6282
      self._LockInstancesNodes()
6283

    
6284
  def BuildHooksEnv(self):
6285
    """Build hooks env.
6286

6287
    This runs on the master, primary and secondaries.
6288

6289
    """
6290
    args = dict()
6291
    if constants.BE_MEMORY in self.be_new:
6292
      args['memory'] = self.be_new[constants.BE_MEMORY]
6293
    if constants.BE_VCPUS in self.be_new:
6294
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
6295
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
6296
    # information at all.
6297
    if self.op.nics:
6298
      args['nics'] = []
6299
      nic_override = dict(self.op.nics)
6300
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
6301
      for idx, nic in enumerate(self.instance.nics):
6302
        if idx in nic_override:
6303
          this_nic_override = nic_override[idx]
6304
        else:
6305
          this_nic_override = {}
6306
        if 'ip' in this_nic_override:
6307
          ip = this_nic_override['ip']
6308
        else:
6309
          ip = nic.ip
6310
        if 'mac' in this_nic_override:
6311
          mac = this_nic_override['mac']
6312
        else:
6313
          mac = nic.mac
6314
        if idx in self.nic_pnew:
6315
          nicparams = self.nic_pnew[idx]
6316
        else:
6317
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
6318
        mode = nicparams[constants.NIC_MODE]
6319
        link = nicparams[constants.NIC_LINK]
6320
        args['nics'].append((ip, mac, mode, link))
6321
      if constants.DDM_ADD in nic_override:
6322
        ip = nic_override[constants.DDM_ADD].get('ip', None)
6323
        mac = nic_override[constants.DDM_ADD]['mac']
6324
        nicparams = self.nic_pnew[constants.DDM_ADD]
6325
        mode = nicparams[constants.NIC_MODE]
6326
        link = nicparams[constants.NIC_LINK]
6327
        args['nics'].append((ip, mac, mode, link))
6328
      elif constants.DDM_REMOVE in nic_override:
6329
        del args['nics'][-1]
6330

    
6331
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
6332
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6333
    return env, nl, nl
6334

    
6335
  def _GetUpdatedParams(self, old_params, update_dict,
6336
                        default_values, parameter_types):
6337
    """Return the new params dict for the given params.
6338

6339
    @type old_params: dict
6340
    @param old_params: old parameters
6341
    @type update_dict: dict
6342
    @param update_dict: dict containing new parameter values,
6343
                        or constants.VALUE_DEFAULT to reset the
6344
                        parameter to its default value
6345
    @type default_values: dict
6346
    @param default_values: default values for the filled parameters
6347
    @type parameter_types: dict
6348
    @param parameter_types: dict mapping target dict keys to types
6349
                            in constants.ENFORCEABLE_TYPES
6350
    @rtype: (dict, dict)
6351
    @return: (new_parameters, filled_parameters)
6352

6353
    """
6354
    params_copy = copy.deepcopy(old_params)
6355
    for key, val in update_dict.iteritems():
6356
      if val == constants.VALUE_DEFAULT:
6357
        try:
6358
          del params_copy[key]
6359
        except KeyError:
6360
          pass
6361
      else:
6362
        params_copy[key] = val
6363
    utils.ForceDictType(params_copy, parameter_types)
6364
    params_filled = objects.FillDict(default_values, params_copy)
6365
    return (params_copy, params_filled)
6366

    
6367
  def CheckPrereq(self):
6368
    """Check prerequisites.
6369

6370
    This only checks the instance list against the existing names.
6371

6372
    """
6373
    self.force = self.op.force
6374

    
6375
    # checking the new params on the primary/secondary nodes
6376

    
6377
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6378
    cluster = self.cluster = self.cfg.GetClusterInfo()
6379
    assert self.instance is not None, \
6380
      "Cannot retrieve locked instance %s" % self.op.instance_name
6381
    pnode = instance.primary_node
6382
    nodelist = list(instance.all_nodes)
6383

    
6384
    # hvparams processing
6385
    if self.op.hvparams:
6386
      i_hvdict, hv_new = self._GetUpdatedParams(
6387
                             instance.hvparams, self.op.hvparams,
6388
                             cluster.hvparams[instance.hypervisor],
6389
                             constants.HVS_PARAMETER_TYPES)
6390
      # local check
6391
      hypervisor.GetHypervisor(
6392
        instance.hypervisor).CheckParameterSyntax(hv_new)
6393
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
6394
      self.hv_new = hv_new # the new actual values
6395
      self.hv_inst = i_hvdict # the new dict (without defaults)
6396
    else:
6397
      self.hv_new = self.hv_inst = {}
6398

    
6399
    # beparams processing
6400
    if self.op.beparams:
6401
      i_bedict, be_new = self._GetUpdatedParams(
6402
                             instance.beparams, self.op.beparams,
6403
                             cluster.beparams[constants.PP_DEFAULT],
6404
                             constants.BES_PARAMETER_TYPES)
6405
      self.be_new = be_new # the new actual values
6406
      self.be_inst = i_bedict # the new dict (without defaults)
6407
    else:
6408
      self.be_new = self.be_inst = {}
6409

    
6410
    self.warn = []
6411

    
6412
    if constants.BE_MEMORY in self.op.beparams and not self.force:
6413
      mem_check_list = [pnode]
6414
      if be_new[constants.BE_AUTO_BALANCE]:
6415
        # either we changed auto_balance to yes or it was from before
6416
        mem_check_list.extend(instance.secondary_nodes)
6417
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
6418
                                                  instance.hypervisor)
6419
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
6420
                                         instance.hypervisor)
6421
      pninfo = nodeinfo[pnode]
6422
      msg = pninfo.fail_msg
6423
      if msg:
6424
        # Assume the primary node is unreachable and go ahead
6425
        self.warn.append("Can't get info from primary node %s: %s" %
6426
                         (pnode,  msg))
6427
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
6428
        self.warn.append("Node data from primary node %s doesn't contain"
6429
                         " free memory information" % pnode)
6430
      elif instance_info.fail_msg:
6431
        self.warn.append("Can't get instance runtime information: %s" %
6432
                        instance_info.fail_msg)
6433
      else:
6434
        if instance_info.payload:
6435
          current_mem = int(instance_info.payload['memory'])
6436
        else:
6437
          # Assume instance not running
6438
          # (there is a slight race condition here, but it's not very probable,
6439
          # and we have no other way to check)
6440
          current_mem = 0
6441
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
6442
                    pninfo.payload['memory_free'])
6443
        if miss_mem > 0:
6444
          raise errors.OpPrereqError("This change will prevent the instance"
6445
                                     " from starting, due to %d MB of memory"
6446
                                     " missing on its primary node" % miss_mem)
6447

    
6448
      if be_new[constants.BE_AUTO_BALANCE]:
6449
        for node, nres in nodeinfo.items():
6450
          if node not in instance.secondary_nodes:
6451
            continue
6452
          msg = nres.fail_msg
6453
          if msg:
6454
            self.warn.append("Can't get info from secondary node %s: %s" %
6455
                             (node, msg))
6456
          elif not isinstance(nres.payload.get('memory_free', None), int):
6457
            self.warn.append("Secondary node %s didn't return free"
6458
                             " memory information" % node)
6459
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
6460
            self.warn.append("Not enough memory to failover instance to"
6461
                             " secondary node %s" % node)
6462

    
6463
    # NIC processing
6464
    self.nic_pnew = {}
6465
    self.nic_pinst = {}
6466
    for nic_op, nic_dict in self.op.nics:
6467
      if nic_op == constants.DDM_REMOVE:
6468
        if not instance.nics:
6469
          raise errors.OpPrereqError("Instance has no NICs, cannot remove")
6470
        continue
6471
      if nic_op != constants.DDM_ADD:
6472
        # an existing nic
6473
        if nic_op < 0 or nic_op >= len(instance.nics):
6474
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
6475
                                     " are 0 to %d" %
6476
                                     (nic_op, len(instance.nics)))
6477
        old_nic_params = instance.nics[nic_op].nicparams
6478
        old_nic_ip = instance.nics[nic_op].ip
6479
      else:
6480
        old_nic_params = {}
6481
        old_nic_ip = None
6482

    
6483
      update_params_dict = dict([(key, nic_dict[key])
6484
                                 for key in constants.NICS_PARAMETERS
6485
                                 if key in nic_dict])
6486

    
6487
      if 'bridge' in nic_dict:
6488
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
6489

    
6490
      new_nic_params, new_filled_nic_params = \
6491
          self._GetUpdatedParams(old_nic_params, update_params_dict,
6492
                                 cluster.nicparams[constants.PP_DEFAULT],
6493
                                 constants.NICS_PARAMETER_TYPES)
6494
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
6495
      self.nic_pinst[nic_op] = new_nic_params
6496
      self.nic_pnew[nic_op] = new_filled_nic_params
6497
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
6498

    
6499
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
6500
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
6501
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
6502
        if msg:
6503
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
6504
          if self.force:
6505
            self.warn.append(msg)
6506
          else:
6507
            raise errors.OpPrereqError(msg)
6508
      if new_nic_mode == constants.NIC_MODE_ROUTED:
6509
        if 'ip' in nic_dict:
6510
          nic_ip = nic_dict['ip']
6511
        else:
6512
          nic_ip = old_nic_ip
6513
        if nic_ip is None:
6514
          raise errors.OpPrereqError('Cannot set the nic ip to None'
6515
                                     ' on a routed nic')
6516
      if 'mac' in nic_dict:
6517
        nic_mac = nic_dict['mac']
6518
        if nic_mac is None:
6519
          raise errors.OpPrereqError('Cannot set the nic mac to None')
6520
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6521
          # otherwise generate the mac
6522
          nic_dict['mac'] = self.cfg.GenerateMAC()
6523
        else:
6524
          # or validate/reserve the current one
6525
          if self.cfg.IsMacInUse(nic_mac):
6526
            raise errors.OpPrereqError("MAC address %s already in use"
6527
                                       " in cluster" % nic_mac)
6528

    
6529
    # DISK processing
6530
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
6531
      raise errors.OpPrereqError("Disk operations not supported for"
6532
                                 " diskless instances")
6533
    for disk_op, disk_dict in self.op.disks:
6534
      if disk_op == constants.DDM_REMOVE:
6535
        if len(instance.disks) == 1:
6536
          raise errors.OpPrereqError("Cannot remove the last disk of"
6537
                                     " an instance")
6538
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
6539
        ins_l = ins_l[pnode]
6540
        msg = ins_l.fail_msg
6541
        if msg:
6542
          raise errors.OpPrereqError("Can't contact node %s: %s" %
6543
                                     (pnode, msg))
6544
        if instance.name in ins_l.payload:
6545
          raise errors.OpPrereqError("Instance is running, can't remove"
6546
                                     " disks.")
6547

    
6548
      if (disk_op == constants.DDM_ADD and
6549
          len(instance.nics) >= constants.MAX_DISKS):
6550
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
6551
                                   " add more" % constants.MAX_DISKS)
6552
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
6553
        # an existing disk
6554
        if disk_op < 0 or disk_op >= len(instance.disks):
6555
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
6556
                                     " are 0 to %d" %
6557
                                     (disk_op, len(instance.disks)))
6558

    
6559
    return
6560

    
6561
  def Exec(self, feedback_fn):
6562
    """Modifies an instance.
6563

6564
    All parameters take effect only at the next restart of the instance.
6565

6566
    """
6567
    # Process here the warnings from CheckPrereq, as we don't have a
6568
    # feedback_fn there.
6569
    for warn in self.warn:
6570
      feedback_fn("WARNING: %s" % warn)
6571

    
6572
    result = []
6573
    instance = self.instance
6574
    cluster = self.cluster
6575
    # disk changes
6576
    for disk_op, disk_dict in self.op.disks:
6577
      if disk_op == constants.DDM_REMOVE:
6578
        # remove the last disk
6579
        device = instance.disks.pop()
6580
        device_idx = len(instance.disks)
6581
        for node, disk in device.ComputeNodeTree(instance.primary_node):
6582
          self.cfg.SetDiskID(disk, node)
6583
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
6584
          if msg:
6585
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
6586
                            " continuing anyway", device_idx, node, msg)
6587
        result.append(("disk/%d" % device_idx, "remove"))
6588
      elif disk_op == constants.DDM_ADD:
6589
        # add a new disk
6590
        if instance.disk_template == constants.DT_FILE:
6591
          file_driver, file_path = instance.disks[0].logical_id
6592
          file_path = os.path.dirname(file_path)
6593
        else:
6594
          file_driver = file_path = None
6595
        disk_idx_base = len(instance.disks)
6596
        new_disk = _GenerateDiskTemplate(self,
6597
                                         instance.disk_template,
6598
                                         instance.name, instance.primary_node,
6599
                                         instance.secondary_nodes,
6600
                                         [disk_dict],
6601
                                         file_path,
6602
                                         file_driver,
6603
                                         disk_idx_base)[0]
6604
        instance.disks.append(new_disk)
6605
        info = _GetInstanceInfoText(instance)
6606

    
6607
        logging.info("Creating volume %s for instance %s",
6608
                     new_disk.iv_name, instance.name)
6609
        # Note: this needs to be kept in sync with _CreateDisks
6610
        #HARDCODE
6611
        for node in instance.all_nodes:
6612
          f_create = node == instance.primary_node
6613
          try:
6614
            _CreateBlockDev(self, node, instance, new_disk,
6615
                            f_create, info, f_create)
6616
          except errors.OpExecError, err:
6617
            self.LogWarning("Failed to create volume %s (%s) on"
6618
                            " node %s: %s",
6619
                            new_disk.iv_name, new_disk, node, err)
6620
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
6621
                       (new_disk.size, new_disk.mode)))
6622
      else:
6623
        # change a given disk
6624
        instance.disks[disk_op].mode = disk_dict['mode']
6625
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
6626
    # NIC changes
6627
    for nic_op, nic_dict in self.op.nics:
6628
      if nic_op == constants.DDM_REMOVE:
6629
        # remove the last nic
6630
        del instance.nics[-1]
6631
        result.append(("nic.%d" % len(instance.nics), "remove"))
6632
      elif nic_op == constants.DDM_ADD:
6633
        # mac and bridge should be set, by now
6634
        mac = nic_dict['mac']
6635
        ip = nic_dict.get('ip', None)
6636
        nicparams = self.nic_pinst[constants.DDM_ADD]
6637
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
6638
        instance.nics.append(new_nic)
6639
        result.append(("nic.%d" % (len(instance.nics) - 1),
6640
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
6641
                       (new_nic.mac, new_nic.ip,
6642
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
6643
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
6644
                       )))
6645
      else:
6646
        for key in 'mac', 'ip':
6647
          if key in nic_dict:
6648
            setattr(instance.nics[nic_op], key, nic_dict[key])
6649
        if nic_op in self.nic_pnew:
6650
          instance.nics[nic_op].nicparams = self.nic_pnew[nic_op]
6651
        for key, val in nic_dict.iteritems():
6652
          result.append(("nic.%s/%d" % (key, nic_op), val))
6653

    
6654
    # hvparams changes
6655
    if self.op.hvparams:
6656
      instance.hvparams = self.hv_inst
6657
      for key, val in self.op.hvparams.iteritems():
6658
        result.append(("hv/%s" % key, val))
6659

    
6660
    # beparams changes
6661
    if self.op.beparams:
6662
      instance.beparams = self.be_inst
6663
      for key, val in self.op.beparams.iteritems():
6664
        result.append(("be/%s" % key, val))
6665

    
6666
    self.cfg.Update(instance)
6667

    
6668
    return result
6669

    
6670

    
6671
class LUQueryExports(NoHooksLU):
6672
  """Query the exports list
6673

6674
  """
6675
  _OP_REQP = ['nodes']
6676
  REQ_BGL = False
6677

    
6678
  def ExpandNames(self):
6679
    self.needed_locks = {}
6680
    self.share_locks[locking.LEVEL_NODE] = 1
6681
    if not self.op.nodes:
6682
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6683
    else:
6684
      self.needed_locks[locking.LEVEL_NODE] = \
6685
        _GetWantedNodes(self, self.op.nodes)
6686

    
6687
  def CheckPrereq(self):
6688
    """Check prerequisites.
6689

6690
    """
6691
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
6692

    
6693
  def Exec(self, feedback_fn):
6694
    """Compute the list of all the exported system images.
6695

6696
    @rtype: dict
6697
    @return: a dictionary with the structure node->(export-list)
6698
        where export-list is a list of the instances exported on
6699
        that node.
6700

6701
    """
6702
    rpcresult = self.rpc.call_export_list(self.nodes)
6703
    result = {}
6704
    for node in rpcresult:
6705
      if rpcresult[node].fail_msg:
6706
        result[node] = False
6707
      else:
6708
        result[node] = rpcresult[node].payload
6709

    
6710
    return result
6711

    
6712

    
6713
class LUExportInstance(LogicalUnit):
6714
  """Export an instance to an image in the cluster.
6715

6716
  """
6717
  HPATH = "instance-export"
6718
  HTYPE = constants.HTYPE_INSTANCE
6719
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
6720
  REQ_BGL = False
6721

    
6722
  def ExpandNames(self):
6723
    self._ExpandAndLockInstance()
6724
    # FIXME: lock only instance primary and destination node
6725
    #
6726
    # Sad but true, for now we have do lock all nodes, as we don't know where
6727
    # the previous export might be, and and in this LU we search for it and
6728
    # remove it from its current node. In the future we could fix this by:
6729
    #  - making a tasklet to search (share-lock all), then create the new one,
6730
    #    then one to remove, after
6731
    #  - removing the removal operation altogether
6732
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6733

    
6734
  def DeclareLocks(self, level):
6735
    """Last minute lock declaration."""
6736
    # All nodes are locked anyway, so nothing to do here.
6737

    
6738
  def BuildHooksEnv(self):
6739
    """Build hooks env.
6740

6741
    This will run on the master, primary node and target node.
6742

6743
    """
6744
    env = {
6745
      "EXPORT_NODE": self.op.target_node,
6746
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
6747
      }
6748
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6749
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
6750
          self.op.target_node]
6751
    return env, nl, nl
6752

    
6753
  def CheckPrereq(self):
6754
    """Check prerequisites.
6755

6756
    This checks that the instance and node names are valid.
6757

6758
    """
6759
    instance_name = self.op.instance_name
6760
    self.instance = self.cfg.GetInstanceInfo(instance_name)
6761
    assert self.instance is not None, \
6762
          "Cannot retrieve locked instance %s" % self.op.instance_name
6763
    _CheckNodeOnline(self, self.instance.primary_node)
6764

    
6765
    self.dst_node = self.cfg.GetNodeInfo(
6766
      self.cfg.ExpandNodeName(self.op.target_node))
6767

    
6768
    if self.dst_node is None:
6769
      # This is wrong node name, not a non-locked node
6770
      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
6771
    _CheckNodeOnline(self, self.dst_node.name)
6772
    _CheckNodeNotDrained(self, self.dst_node.name)
6773

    
6774
    # instance disk type verification
6775
    for disk in self.instance.disks:
6776
      if disk.dev_type == constants.LD_FILE:
6777
        raise errors.OpPrereqError("Export not supported for instances with"
6778
                                   " file-based disks")
6779

    
6780
  def Exec(self, feedback_fn):
6781
    """Export an instance to an image in the cluster.
6782

6783
    """
6784
    instance = self.instance
6785
    dst_node = self.dst_node
6786
    src_node = instance.primary_node
6787
    if self.op.shutdown:
6788
      # shutdown the instance, but not the disks
6789
      result = self.rpc.call_instance_shutdown(src_node, instance)
6790
      result.Raise("Could not shutdown instance %s on"
6791
                   " node %s" % (instance.name, src_node))
6792

    
6793
    vgname = self.cfg.GetVGName()
6794

    
6795
    snap_disks = []
6796

    
6797
    # set the disks ID correctly since call_instance_start needs the
6798
    # correct drbd minor to create the symlinks
6799
    for disk in instance.disks:
6800
      self.cfg.SetDiskID(disk, src_node)
6801

    
6802
    try:
6803
      for idx, disk in enumerate(instance.disks):
6804
        # result.payload will be a snapshot of an lvm leaf of the one we passed
6805
        result = self.rpc.call_blockdev_snapshot(src_node, disk)
6806
        msg = result.fail_msg
6807
        if msg:
6808
          self.LogWarning("Could not snapshot disk/%s on node %s: %s",
6809
                          idx, src_node, msg)
6810
          snap_disks.append(False)
6811
        else:
6812
          disk_id = (vgname, result.payload)
6813
          new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
6814
                                 logical_id=disk_id, physical_id=disk_id,
6815
                                 iv_name=disk.iv_name)
6816
          snap_disks.append(new_dev)
6817

    
6818
    finally:
6819
      if self.op.shutdown and instance.admin_up:
6820
        result = self.rpc.call_instance_start(src_node, instance, None, None)
6821
        msg = result.fail_msg
6822
        if msg:
6823
          _ShutdownInstanceDisks(self, instance)
6824
          raise errors.OpExecError("Could not start instance: %s" % msg)
6825

    
6826
    # TODO: check for size
6827

    
6828
    cluster_name = self.cfg.GetClusterName()
6829
    for idx, dev in enumerate(snap_disks):
6830
      if dev:
6831
        result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
6832
                                               instance, cluster_name, idx)
6833
        msg = result.fail_msg
6834
        if msg:
6835
          self.LogWarning("Could not export disk/%s from node %s to"
6836
                          " node %s: %s", idx, src_node, dst_node.name, msg)
6837
        msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
6838
        if msg:
6839
          self.LogWarning("Could not remove snapshot for disk/%d from node"
6840
                          " %s: %s", idx, src_node, msg)
6841

    
6842
    result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
6843
    msg = result.fail_msg
6844
    if msg:
6845
      self.LogWarning("Could not finalize export for instance %s"
6846
                      " on node %s: %s", instance.name, dst_node.name, msg)
6847

    
6848
    nodelist = self.cfg.GetNodeList()
6849
    nodelist.remove(dst_node.name)
6850

    
6851
    # on one-node clusters nodelist will be empty after the removal
6852
    # if we proceed the backup would be removed because OpQueryExports
6853
    # substitutes an empty list with the full cluster node list.
6854
    iname = instance.name
6855
    if nodelist:
6856
      exportlist = self.rpc.call_export_list(nodelist)
6857
      for node in exportlist:
6858
        if exportlist[node].fail_msg:
6859
          continue
6860
        if iname in exportlist[node].payload:
6861
          msg = self.rpc.call_export_remove(node, iname).fail_msg
6862
          if msg:
6863
            self.LogWarning("Could not remove older export for instance %s"
6864
                            " on node %s: %s", iname, node, msg)
6865

    
6866

    
6867
class LURemoveExport(NoHooksLU):
6868
  """Remove exports related to the named instance.
6869

6870
  """
6871
  _OP_REQP = ["instance_name"]
6872
  REQ_BGL = False
6873

    
6874
  def ExpandNames(self):
6875
    self.needed_locks = {}
6876
    # We need all nodes to be locked in order for RemoveExport to work, but we
6877
    # don't need to lock the instance itself, as nothing will happen to it (and
6878
    # we can remove exports also for a removed instance)
6879
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6880

    
6881
  def CheckPrereq(self):
6882
    """Check prerequisites.
6883
    """
6884
    pass
6885

    
6886
  def Exec(self, feedback_fn):
6887
    """Remove any export.
6888

6889
    """
6890
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
6891
    # If the instance was not found we'll try with the name that was passed in.
6892
    # This will only work if it was an FQDN, though.
6893
    fqdn_warn = False
6894
    if not instance_name:
6895
      fqdn_warn = True
6896
      instance_name = self.op.instance_name
6897

    
6898
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6899
    exportlist = self.rpc.call_export_list(locked_nodes)
6900
    found = False
6901
    for node in exportlist:
6902
      msg = exportlist[node].fail_msg
6903
      if msg:
6904
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
6905
        continue
6906
      if instance_name in exportlist[node].payload:
6907
        found = True
6908
        result = self.rpc.call_export_remove(node, instance_name)
6909
        msg = result.fail_msg
6910
        if msg:
6911
          logging.error("Could not remove export for instance %s"
6912
                        " on node %s: %s", instance_name, node, msg)
6913

    
6914
    if fqdn_warn and not found:
6915
      feedback_fn("Export not found. If trying to remove an export belonging"
6916
                  " to a deleted instance please use its Fully Qualified"
6917
                  " Domain Name.")
6918

    
6919

    
6920
class TagsLU(NoHooksLU):
6921
  """Generic tags LU.
6922

6923
  This is an abstract class which is the parent of all the other tags LUs.
6924

6925
  """
6926

    
6927
  def ExpandNames(self):
6928
    self.needed_locks = {}
6929
    if self.op.kind == constants.TAG_NODE:
6930
      name = self.cfg.ExpandNodeName(self.op.name)
6931
      if name is None:
6932
        raise errors.OpPrereqError("Invalid node name (%s)" %
6933
                                   (self.op.name,))
6934
      self.op.name = name
6935
      self.needed_locks[locking.LEVEL_NODE] = name
6936
    elif self.op.kind == constants.TAG_INSTANCE:
6937
      name = self.cfg.ExpandInstanceName(self.op.name)
6938
      if name is None:
6939
        raise errors.OpPrereqError("Invalid instance name (%s)" %
6940
                                   (self.op.name,))
6941
      self.op.name = name
6942
      self.needed_locks[locking.LEVEL_INSTANCE] = name
6943

    
6944
  def CheckPrereq(self):
6945
    """Check prerequisites.
6946

6947
    """
6948
    if self.op.kind == constants.TAG_CLUSTER:
6949
      self.target = self.cfg.GetClusterInfo()
6950
    elif self.op.kind == constants.TAG_NODE:
6951
      self.target = self.cfg.GetNodeInfo(self.op.name)
6952
    elif self.op.kind == constants.TAG_INSTANCE:
6953
      self.target = self.cfg.GetInstanceInfo(self.op.name)
6954
    else:
6955
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
6956
                                 str(self.op.kind))
6957

    
6958

    
6959
class LUGetTags(TagsLU):
6960
  """Returns the tags of a given object.
6961

6962
  """
6963
  _OP_REQP = ["kind", "name"]
6964
  REQ_BGL = False
6965

    
6966
  def Exec(self, feedback_fn):
6967
    """Returns the tag list.
6968

6969
    """
6970
    return list(self.target.GetTags())
6971

    
6972

    
6973
class LUSearchTags(NoHooksLU):
6974
  """Searches the tags for a given pattern.
6975

6976
  """
6977
  _OP_REQP = ["pattern"]
6978
  REQ_BGL = False
6979

    
6980
  def ExpandNames(self):
6981
    self.needed_locks = {}
6982

    
6983
  def CheckPrereq(self):
6984
    """Check prerequisites.
6985

6986
    This checks the pattern passed for validity by compiling it.
6987

6988
    """
6989
    try:
6990
      self.re = re.compile(self.op.pattern)
6991
    except re.error, err:
6992
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
6993
                                 (self.op.pattern, err))
6994

    
6995
  def Exec(self, feedback_fn):
6996
    """Returns the tag list.
6997

6998
    """
6999
    cfg = self.cfg
7000
    tgts = [("/cluster", cfg.GetClusterInfo())]
7001
    ilist = cfg.GetAllInstancesInfo().values()
7002
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
7003
    nlist = cfg.GetAllNodesInfo().values()
7004
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
7005
    results = []
7006
    for path, target in tgts:
7007
      for tag in target.GetTags():
7008
        if self.re.search(tag):
7009
          results.append((path, tag))
7010
    return results
7011

    
7012

    
7013
class LUAddTags(TagsLU):
7014
  """Sets a tag on a given object.
7015

7016
  """
7017
  _OP_REQP = ["kind", "name", "tags"]
7018
  REQ_BGL = False
7019

    
7020
  def CheckPrereq(self):
7021
    """Check prerequisites.
7022

7023
    This checks the type and length of the tag name and value.
7024

7025
    """
7026
    TagsLU.CheckPrereq(self)
7027
    for tag in self.op.tags:
7028
      objects.TaggableObject.ValidateTag(tag)
7029

    
7030
  def Exec(self, feedback_fn):
7031
    """Sets the tag.
7032

7033
    """
7034
    try:
7035
      for tag in self.op.tags:
7036
        self.target.AddTag(tag)
7037
    except errors.TagError, err:
7038
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
7039
    try:
7040
      self.cfg.Update(self.target)
7041
    except errors.ConfigurationError:
7042
      raise errors.OpRetryError("There has been a modification to the"
7043
                                " config file and the operation has been"
7044
                                " aborted. Please retry.")
7045

    
7046

    
7047
class LUDelTags(TagsLU):
7048
  """Delete a list of tags from a given object.
7049

7050
  """
7051
  _OP_REQP = ["kind", "name", "tags"]
7052
  REQ_BGL = False
7053

    
7054
  def CheckPrereq(self):
7055
    """Check prerequisites.
7056

7057
    This checks that we have the given tag.
7058

7059
    """
7060
    TagsLU.CheckPrereq(self)
7061
    for tag in self.op.tags:
7062
      objects.TaggableObject.ValidateTag(tag)
7063
    del_tags = frozenset(self.op.tags)
7064
    cur_tags = self.target.GetTags()
7065
    if not del_tags <= cur_tags:
7066
      diff_tags = del_tags - cur_tags
7067
      diff_names = ["'%s'" % tag for tag in diff_tags]
7068
      diff_names.sort()
7069
      raise errors.OpPrereqError("Tag(s) %s not found" %
7070
                                 (",".join(diff_names)))
7071

    
7072
  def Exec(self, feedback_fn):
7073
    """Remove the tag from the object.
7074

7075
    """
7076
    for tag in self.op.tags:
7077
      self.target.RemoveTag(tag)
7078
    try:
7079
      self.cfg.Update(self.target)
7080
    except errors.ConfigurationError:
7081
      raise errors.OpRetryError("There has been a modification to the"
7082
                                " config file and the operation has been"
7083
                                " aborted. Please retry.")
7084

    
7085

    
7086
class LUTestDelay(NoHooksLU):
7087
  """Sleep for a specified amount of time.
7088

7089
  This LU sleeps on the master and/or nodes for a specified amount of
7090
  time.
7091

7092
  """
7093
  _OP_REQP = ["duration", "on_master", "on_nodes"]
7094
  REQ_BGL = False
7095

    
7096
  def ExpandNames(self):
7097
    """Expand names and set required locks.
7098

7099
    This expands the node list, if any.
7100

7101
    """
7102
    self.needed_locks = {}
7103
    if self.op.on_nodes:
7104
      # _GetWantedNodes can be used here, but is not always appropriate to use
7105
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
7106
      # more information.
7107
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
7108
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
7109

    
7110
  def CheckPrereq(self):
7111
    """Check prerequisites.
7112

7113
    """
7114

    
7115
  def Exec(self, feedback_fn):
7116
    """Do the actual sleep.
7117

7118
    """
7119
    if self.op.on_master:
7120
      if not utils.TestDelay(self.op.duration):
7121
        raise errors.OpExecError("Error during master delay test")
7122
    if self.op.on_nodes:
7123
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
7124
      for node, node_result in result.items():
7125
        node_result.Raise("Failure during rpc call to node %s" % node)
7126

    
7127

    
7128
class IAllocator(object):
7129
  """IAllocator framework.
7130

7131
  An IAllocator instance has three sets of attributes:
7132
    - cfg that is needed to query the cluster
7133
    - input data (all members of the _KEYS class attribute are required)
7134
    - four buffer attributes (in|out_data|text), that represent the
7135
      input (to the external script) in text and data structure format,
7136
      and the output from it, again in two formats
7137
    - the result variables from the script (success, info, nodes) for
7138
      easy usage
7139

7140
  """
7141
  _ALLO_KEYS = [
7142
    "mem_size", "disks", "disk_template",
7143
    "os", "tags", "nics", "vcpus", "hypervisor",
7144
    ]
7145
  _RELO_KEYS = [
7146
    "relocate_from",
7147
    ]
7148

    
7149
  def __init__(self, cfg, rpc, mode, name, **kwargs):
7150
    self.cfg = cfg
7151
    self.rpc = rpc
7152
    # init buffer variables
7153
    self.in_text = self.out_text = self.in_data = self.out_data = None
7154
    # init all input fields so that pylint is happy
7155
    self.mode = mode
7156
    self.name = name
7157
    self.mem_size = self.disks = self.disk_template = None
7158
    self.os = self.tags = self.nics = self.vcpus = None
7159
    self.hypervisor = None
7160
    self.relocate_from = None
7161
    # computed fields
7162
    self.required_nodes = None
7163
    # init result fields
7164
    self.success = self.info = self.nodes = None
7165
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7166
      keyset = self._ALLO_KEYS
7167
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
7168
      keyset = self._RELO_KEYS
7169
    else:
7170
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
7171
                                   " IAllocator" % self.mode)
7172
    for key in kwargs:
7173
      if key not in keyset:
7174
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
7175
                                     " IAllocator" % key)
7176
      setattr(self, key, kwargs[key])
7177
    for key in keyset:
7178
      if key not in kwargs:
7179
        raise errors.ProgrammerError("Missing input parameter '%s' to"
7180
                                     " IAllocator" % key)
7181
    self._BuildInputData()
7182

    
7183
  def _ComputeClusterData(self):
7184
    """Compute the generic allocator input data.
7185

7186
    This is the data that is independent of the actual operation.
7187

7188
    """
7189
    cfg = self.cfg
7190
    cluster_info = cfg.GetClusterInfo()
7191
    # cluster data
7192
    data = {
7193
      "version": constants.IALLOCATOR_VERSION,
7194
      "cluster_name": cfg.GetClusterName(),
7195
      "cluster_tags": list(cluster_info.GetTags()),
7196
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
7197
      # we don't have job IDs
7198
      }
7199
    iinfo = cfg.GetAllInstancesInfo().values()
7200
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
7201

    
7202
    # node data
7203
    node_results = {}
7204
    node_list = cfg.GetNodeList()
7205

    
7206
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7207
      hypervisor_name = self.hypervisor
7208
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
7209
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
7210

    
7211
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
7212
                                        hypervisor_name)
7213
    node_iinfo = \
7214
      self.rpc.call_all_instances_info(node_list,
7215
                                       cluster_info.enabled_hypervisors)
7216
    for nname, nresult in node_data.items():
7217
      # first fill in static (config-based) values
7218
      ninfo = cfg.GetNodeInfo(nname)
7219
      pnr = {
7220
        "tags": list(ninfo.GetTags()),
7221
        "primary_ip": ninfo.primary_ip,
7222
        "secondary_ip": ninfo.secondary_ip,
7223
        "offline": ninfo.offline,
7224
        "drained": ninfo.drained,
7225
        "master_candidate": ninfo.master_candidate,
7226
        }
7227

    
7228
      if not ninfo.offline:
7229
        nresult.Raise("Can't get data for node %s" % nname)
7230
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
7231
                                nname)
7232
        remote_info = nresult.payload
7233
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
7234
                     'vg_size', 'vg_free', 'cpu_total']:
7235
          if attr not in remote_info:
7236
            raise errors.OpExecError("Node '%s' didn't return attribute"
7237
                                     " '%s'" % (nname, attr))
7238
          if not isinstance(remote_info[attr], int):
7239
            raise errors.OpExecError("Node '%s' returned invalid value"
7240
                                     " for '%s': %s" %
7241
                                     (nname, attr, remote_info[attr]))
7242
        # compute memory used by primary instances
7243
        i_p_mem = i_p_up_mem = 0
7244
        for iinfo, beinfo in i_list:
7245
          if iinfo.primary_node == nname:
7246
            i_p_mem += beinfo[constants.BE_MEMORY]
7247
            if iinfo.name not in node_iinfo[nname].payload:
7248
              i_used_mem = 0
7249
            else:
7250
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
7251
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
7252
            remote_info['memory_free'] -= max(0, i_mem_diff)
7253

    
7254
            if iinfo.admin_up:
7255
              i_p_up_mem += beinfo[constants.BE_MEMORY]
7256

    
7257
        # compute memory used by instances
7258
        pnr_dyn = {
7259
          "total_memory": remote_info['memory_total'],
7260
          "reserved_memory": remote_info['memory_dom0'],
7261
          "free_memory": remote_info['memory_free'],
7262
          "total_disk": remote_info['vg_size'],
7263
          "free_disk": remote_info['vg_free'],
7264
          "total_cpus": remote_info['cpu_total'],
7265
          "i_pri_memory": i_p_mem,
7266
          "i_pri_up_memory": i_p_up_mem,
7267
          }
7268
        pnr.update(pnr_dyn)
7269

    
7270
      node_results[nname] = pnr
7271
    data["nodes"] = node_results
7272

    
7273
    # instance data
7274
    instance_data = {}
7275
    for iinfo, beinfo in i_list:
7276
      nic_data = []
7277
      for nic in iinfo.nics:
7278
        filled_params = objects.FillDict(
7279
            cluster_info.nicparams[constants.PP_DEFAULT],
7280
            nic.nicparams)
7281
        nic_dict = {"mac": nic.mac,
7282
                    "ip": nic.ip,
7283
                    "mode": filled_params[constants.NIC_MODE],
7284
                    "link": filled_params[constants.NIC_LINK],
7285
                   }
7286
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
7287
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
7288
        nic_data.append(nic_dict)
7289
      pir = {
7290
        "tags": list(iinfo.GetTags()),
7291
        "admin_up": iinfo.admin_up,
7292
        "vcpus": beinfo[constants.BE_VCPUS],
7293
        "memory": beinfo[constants.BE_MEMORY],
7294
        "os": iinfo.os,
7295
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
7296
        "nics": nic_data,
7297
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
7298
        "disk_template": iinfo.disk_template,
7299
        "hypervisor": iinfo.hypervisor,
7300
        }
7301
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
7302
                                                 pir["disks"])
7303
      instance_data[iinfo.name] = pir
7304

    
7305
    data["instances"] = instance_data
7306

    
7307
    self.in_data = data
7308

    
7309
  def _AddNewInstance(self):
7310
    """Add new instance data to allocator structure.
7311

7312
    This in combination with _AllocatorGetClusterData will create the
7313
    correct structure needed as input for the allocator.
7314

7315
    The checks for the completeness of the opcode must have already been
7316
    done.
7317

7318
    """
7319
    data = self.in_data
7320

    
7321
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
7322

    
7323
    if self.disk_template in constants.DTS_NET_MIRROR:
7324
      self.required_nodes = 2
7325
    else:
7326
      self.required_nodes = 1
7327
    request = {
7328
      "type": "allocate",
7329
      "name": self.name,
7330
      "disk_template": self.disk_template,
7331
      "tags": self.tags,
7332
      "os": self.os,
7333
      "vcpus": self.vcpus,
7334
      "memory": self.mem_size,
7335
      "disks": self.disks,
7336
      "disk_space_total": disk_space,
7337
      "nics": self.nics,
7338
      "required_nodes": self.required_nodes,
7339
      }
7340
    data["request"] = request
7341

    
7342
  def _AddRelocateInstance(self):
7343
    """Add relocate instance data to allocator structure.
7344

7345
    This in combination with _IAllocatorGetClusterData will create the
7346
    correct structure needed as input for the allocator.
7347

7348
    The checks for the completeness of the opcode must have already been
7349
    done.
7350

7351
    """
7352
    instance = self.cfg.GetInstanceInfo(self.name)
7353
    if instance is None:
7354
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
7355
                                   " IAllocator" % self.name)
7356

    
7357
    if instance.disk_template not in constants.DTS_NET_MIRROR:
7358
      raise errors.OpPrereqError("Can't relocate non-mirrored instances")
7359

    
7360
    if len(instance.secondary_nodes) != 1:
7361
      raise errors.OpPrereqError("Instance has not exactly one secondary node")
7362

    
7363
    self.required_nodes = 1
7364
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
7365
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
7366

    
7367
    request = {
7368
      "type": "relocate",
7369
      "name": self.name,
7370
      "disk_space_total": disk_space,
7371
      "required_nodes": self.required_nodes,
7372
      "relocate_from": self.relocate_from,
7373
      }
7374
    self.in_data["request"] = request
7375

    
7376
  def _BuildInputData(self):
7377
    """Build input data structures.
7378

7379
    """
7380
    self._ComputeClusterData()
7381

    
7382
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7383
      self._AddNewInstance()
7384
    else:
7385
      self._AddRelocateInstance()
7386

    
7387
    self.in_text = serializer.Dump(self.in_data)
7388

    
7389
  def Run(self, name, validate=True, call_fn=None):
7390
    """Run an instance allocator and return the results.
7391

7392
    """
7393
    if call_fn is None:
7394
      call_fn = self.rpc.call_iallocator_runner
7395

    
7396
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
7397
    result.Raise("Failure while running the iallocator script")
7398

    
7399
    self.out_text = result.payload
7400
    if validate:
7401
      self._ValidateResult()
7402

    
7403
  def _ValidateResult(self):
7404
    """Process the allocator results.
7405

7406
    This will process and if successful save the result in
7407
    self.out_data and the other parameters.
7408

7409
    """
7410
    try:
7411
      rdict = serializer.Load(self.out_text)
7412
    except Exception, err:
7413
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
7414

    
7415
    if not isinstance(rdict, dict):
7416
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
7417

    
7418
    for key in "success", "info", "nodes":
7419
      if key not in rdict:
7420
        raise errors.OpExecError("Can't parse iallocator results:"
7421
                                 " missing key '%s'" % key)
7422
      setattr(self, key, rdict[key])
7423

    
7424
    if not isinstance(rdict["nodes"], list):
7425
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
7426
                               " is not a list")
7427
    self.out_data = rdict
7428

    
7429

    
7430
class LUTestAllocator(NoHooksLU):
7431
  """Run allocator tests.
7432

7433
  This LU runs the allocator tests
7434

7435
  """
7436
  _OP_REQP = ["direction", "mode", "name"]
7437

    
7438
  def CheckPrereq(self):
7439
    """Check prerequisites.
7440

7441
    This checks the opcode parameters depending on the director and mode test.
7442

7443
    """
7444
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
7445
      for attr in ["name", "mem_size", "disks", "disk_template",
7446
                   "os", "tags", "nics", "vcpus"]:
7447
        if not hasattr(self.op, attr):
7448
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
7449
                                     attr)
7450
      iname = self.cfg.ExpandInstanceName(self.op.name)
7451
      if iname is not None:
7452
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
7453
                                   iname)
7454
      if not isinstance(self.op.nics, list):
7455
        raise errors.OpPrereqError("Invalid parameter 'nics'")
7456
      for row in self.op.nics:
7457
        if (not isinstance(row, dict) or
7458
            "mac" not in row or
7459
            "ip" not in row or
7460
            "bridge" not in row):
7461
          raise errors.OpPrereqError("Invalid contents of the"
7462
                                     " 'nics' parameter")
7463
      if not isinstance(self.op.disks, list):
7464
        raise errors.OpPrereqError("Invalid parameter 'disks'")
7465
      for row in self.op.disks:
7466
        if (not isinstance(row, dict) or
7467
            "size" not in row or
7468
            not isinstance(row["size"], int) or
7469
            "mode" not in row or
7470
            row["mode"] not in ['r', 'w']):
7471
          raise errors.OpPrereqError("Invalid contents of the"
7472
                                     " 'disks' parameter")
7473
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
7474
        self.op.hypervisor = self.cfg.GetHypervisorType()
7475
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
7476
      if not hasattr(self.op, "name"):
7477
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
7478
      fname = self.cfg.ExpandInstanceName(self.op.name)
7479
      if fname is None:
7480
        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
7481
                                   self.op.name)
7482
      self.op.name = fname
7483
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
7484
    else:
7485
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
7486
                                 self.op.mode)
7487

    
7488
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
7489
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
7490
        raise errors.OpPrereqError("Missing allocator name")
7491
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
7492
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
7493
                                 self.op.direction)
7494

    
7495
  def Exec(self, feedback_fn):
7496
    """Run the allocator test.
7497

7498
    """
7499
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
7500
      ial = IAllocator(self.cfg, self.rpc,
7501
                       mode=self.op.mode,
7502
                       name=self.op.name,
7503
                       mem_size=self.op.mem_size,
7504
                       disks=self.op.disks,
7505
                       disk_template=self.op.disk_template,
7506
                       os=self.op.os,
7507
                       tags=self.op.tags,
7508
                       nics=self.op.nics,
7509
                       vcpus=self.op.vcpus,
7510
                       hypervisor=self.op.hypervisor,
7511
                       )
7512
    else:
7513
      ial = IAllocator(self.cfg, self.rpc,
7514
                       mode=self.op.mode,
7515
                       name=self.op.name,
7516
                       relocate_from=list(self.relocate_from),
7517
                       )
7518

    
7519
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
7520
      result = ial.in_text
7521
    else:
7522
      ial.Run(self.op.allocator, validate=False)
7523
      result = ial.out_text
7524
    return result