Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 8c96d01f

History | View | Annotate | Download (260.3 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0613,W0201
25

    
26
import os
27
import os.path
28
import time
29
import re
30
import platform
31
import logging
32
import copy
33

    
34
from ganeti import ssh
35
from ganeti import utils
36
from ganeti import errors
37
from ganeti import hypervisor
38
from ganeti import locking
39
from ganeti import constants
40
from ganeti import objects
41
from ganeti import serializer
42
from ganeti import ssconf
43

    
44

    
45
class LogicalUnit(object):
46
  """Logical Unit base class.
47

48
  Subclasses must follow these rules:
49
    - implement ExpandNames
50
    - implement CheckPrereq (except when tasklets are used)
51
    - implement Exec (except when tasklets are used)
52
    - implement BuildHooksEnv
53
    - redefine HPATH and HTYPE
54
    - optionally redefine their run requirements:
55
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
56

57
  Note that all commands require root permissions.
58

59
  @ivar dry_run_result: the value (if any) that will be returned to the caller
60
      in dry-run mode (signalled by opcode dry_run parameter)
61

62
  """
63
  HPATH = None
64
  HTYPE = None
65
  _OP_REQP = []
66
  REQ_BGL = True
67

    
68
  def __init__(self, processor, op, context, rpc):
69
    """Constructor for LogicalUnit.
70

71
    This needs to be overridden in derived classes in order to check op
72
    validity.
73

74
    """
75
    self.proc = processor
76
    self.op = op
77
    self.cfg = context.cfg
78
    self.context = context
79
    self.rpc = rpc
80
    # Dicts used to declare locking needs to mcpu
81
    self.needed_locks = None
82
    self.acquired_locks = {}
83
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
84
    self.add_locks = {}
85
    self.remove_locks = {}
86
    # Used to force good behavior when calling helper functions
87
    self.recalculate_locks = {}
88
    self.__ssh = None
89
    # logging
90
    self.LogWarning = processor.LogWarning
91
    self.LogInfo = processor.LogInfo
92
    self.LogStep = processor.LogStep
93
    # support for dry-run
94
    self.dry_run_result = None
95

    
96
    # Tasklets
97
    self.tasklets = []
98

    
99
    for attr_name in self._OP_REQP:
100
      attr_val = getattr(op, attr_name, None)
101
      if attr_val is None:
102
        raise errors.OpPrereqError("Required parameter '%s' missing" %
103
                                   attr_name)
104

    
105
    self.CheckArguments()
106

    
107
  def __GetSSH(self):
108
    """Returns the SshRunner object
109

110
    """
111
    if not self.__ssh:
112
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
113
    return self.__ssh
114

    
115
  ssh = property(fget=__GetSSH)
116

    
117
  def CheckArguments(self):
118
    """Check syntactic validity for the opcode arguments.
119

120
    This method is for doing a simple syntactic check and ensure
121
    validity of opcode parameters, without any cluster-related
122
    checks. While the same can be accomplished in ExpandNames and/or
123
    CheckPrereq, doing these separate is better because:
124

125
      - ExpandNames is left as as purely a lock-related function
126
      - CheckPrereq is run after we have acquired locks (and possible
127
        waited for them)
128

129
    The function is allowed to change the self.op attribute so that
130
    later methods can no longer worry about missing parameters.
131

132
    """
133
    pass
134

    
135
  def ExpandNames(self):
136
    """Expand names for this LU.
137

138
    This method is called before starting to execute the opcode, and it should
139
    update all the parameters of the opcode to their canonical form (e.g. a
140
    short node name must be fully expanded after this method has successfully
141
    completed). This way locking, hooks, logging, ecc. can work correctly.
142

143
    LUs which implement this method must also populate the self.needed_locks
144
    member, as a dict with lock levels as keys, and a list of needed lock names
145
    as values. Rules:
146

147
      - use an empty dict if you don't need any lock
148
      - if you don't need any lock at a particular level omit that level
149
      - don't put anything for the BGL level
150
      - if you want all locks at a level use locking.ALL_SET as a value
151

152
    If you need to share locks (rather than acquire them exclusively) at one
153
    level you can modify self.share_locks, setting a true value (usually 1) for
154
    that level. By default locks are not shared.
155

156
    This function can also define a list of tasklets, which then will be
157
    executed in order instead of the usual LU-level CheckPrereq and Exec
158
    functions, if those are not defined by the LU.
159

160
    Examples::
161

162
      # Acquire all nodes and one instance
163
      self.needed_locks = {
164
        locking.LEVEL_NODE: locking.ALL_SET,
165
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
166
      }
167
      # Acquire just two nodes
168
      self.needed_locks = {
169
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
170
      }
171
      # Acquire no locks
172
      self.needed_locks = {} # No, you can't leave it to the default value None
173

174
    """
175
    # The implementation of this method is mandatory only if the new LU is
176
    # concurrent, so that old LUs don't need to be changed all at the same
177
    # time.
178
    if self.REQ_BGL:
179
      self.needed_locks = {} # Exclusive LUs don't need locks.
180
    else:
181
      raise NotImplementedError
182

    
183
  def DeclareLocks(self, level):
184
    """Declare LU locking needs for a level
185

186
    While most LUs can just declare their locking needs at ExpandNames time,
187
    sometimes there's the need to calculate some locks after having acquired
188
    the ones before. This function is called just before acquiring locks at a
189
    particular level, but after acquiring the ones at lower levels, and permits
190
    such calculations. It can be used to modify self.needed_locks, and by
191
    default it does nothing.
192

193
    This function is only called if you have something already set in
194
    self.needed_locks for the level.
195

196
    @param level: Locking level which is going to be locked
197
    @type level: member of ganeti.locking.LEVELS
198

199
    """
200

    
201
  def CheckPrereq(self):
202
    """Check prerequisites for this LU.
203

204
    This method should check that the prerequisites for the execution
205
    of this LU are fulfilled. It can do internode communication, but
206
    it should be idempotent - no cluster or system changes are
207
    allowed.
208

209
    The method should raise errors.OpPrereqError in case something is
210
    not fulfilled. Its return value is ignored.
211

212
    This method should also update all the parameters of the opcode to
213
    their canonical form if it hasn't been done by ExpandNames before.
214

215
    """
216
    if self.tasklets:
217
      for tl in self.tasklets:
218
        tl.CheckPrereq()
219
    else:
220
      raise NotImplementedError
221

    
222
  def Exec(self, feedback_fn):
223
    """Execute the LU.
224

225
    This method should implement the actual work. It should raise
226
    errors.OpExecError for failures that are somewhat dealt with in
227
    code, or expected.
228

229
    """
230
    if self.tasklets:
231
      for tl in self.tasklets:
232
        tl.Exec(feedback_fn)
233
    else:
234
      raise NotImplementedError
235

    
236
  def BuildHooksEnv(self):
237
    """Build hooks environment for this LU.
238

239
    This method should return a three-node tuple consisting of: a dict
240
    containing the environment that will be used for running the
241
    specific hook for this LU, a list of node names on which the hook
242
    should run before the execution, and a list of node names on which
243
    the hook should run after the execution.
244

245
    The keys of the dict must not have 'GANETI_' prefixed as this will
246
    be handled in the hooks runner. Also note additional keys will be
247
    added by the hooks runner. If the LU doesn't define any
248
    environment, an empty dict (and not None) should be returned.
249

250
    No nodes should be returned as an empty list (and not None).
251

252
    Note that if the HPATH for a LU class is None, this function will
253
    not be called.
254

255
    """
256
    raise NotImplementedError
257

    
258
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
259
    """Notify the LU about the results of its hooks.
260

261
    This method is called every time a hooks phase is executed, and notifies
262
    the Logical Unit about the hooks' result. The LU can then use it to alter
263
    its result based on the hooks.  By default the method does nothing and the
264
    previous result is passed back unchanged but any LU can define it if it
265
    wants to use the local cluster hook-scripts somehow.
266

267
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
268
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
269
    @param hook_results: the results of the multi-node hooks rpc call
270
    @param feedback_fn: function used send feedback back to the caller
271
    @param lu_result: the previous Exec result this LU had, or None
272
        in the PRE phase
273
    @return: the new Exec result, based on the previous result
274
        and hook results
275

276
    """
277
    return lu_result
278

    
279
  def _ExpandAndLockInstance(self):
280
    """Helper function to expand and lock an instance.
281

282
    Many LUs that work on an instance take its name in self.op.instance_name
283
    and need to expand it and then declare the expanded name for locking. This
284
    function does it, and then updates self.op.instance_name to the expanded
285
    name. It also initializes needed_locks as a dict, if this hasn't been done
286
    before.
287

288
    """
289
    if self.needed_locks is None:
290
      self.needed_locks = {}
291
    else:
292
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
293
        "_ExpandAndLockInstance called with instance-level locks set"
294
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
295
    if expanded_name is None:
296
      raise errors.OpPrereqError("Instance '%s' not known" %
297
                                  self.op.instance_name)
298
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
299
    self.op.instance_name = expanded_name
300

    
301
  def _LockInstancesNodes(self, primary_only=False):
302
    """Helper function to declare instances' nodes for locking.
303

304
    This function should be called after locking one or more instances to lock
305
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
306
    with all primary or secondary nodes for instances already locked and
307
    present in self.needed_locks[locking.LEVEL_INSTANCE].
308

309
    It should be called from DeclareLocks, and for safety only works if
310
    self.recalculate_locks[locking.LEVEL_NODE] is set.
311

312
    In the future it may grow parameters to just lock some instance's nodes, or
313
    to just lock primaries or secondary nodes, if needed.
314

315
    If should be called in DeclareLocks in a way similar to::
316

317
      if level == locking.LEVEL_NODE:
318
        self._LockInstancesNodes()
319

320
    @type primary_only: boolean
321
    @param primary_only: only lock primary nodes of locked instances
322

323
    """
324
    assert locking.LEVEL_NODE in self.recalculate_locks, \
325
      "_LockInstancesNodes helper function called with no nodes to recalculate"
326

    
327
    # TODO: check if we're really been called with the instance locks held
328

    
329
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
330
    # future we might want to have different behaviors depending on the value
331
    # of self.recalculate_locks[locking.LEVEL_NODE]
332
    wanted_nodes = []
333
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
334
      instance = self.context.cfg.GetInstanceInfo(instance_name)
335
      wanted_nodes.append(instance.primary_node)
336
      if not primary_only:
337
        wanted_nodes.extend(instance.secondary_nodes)
338

    
339
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
340
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
341
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
342
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
343

    
344
    del self.recalculate_locks[locking.LEVEL_NODE]
345

    
346

    
347
class NoHooksLU(LogicalUnit):
348
  """Simple LU which runs no hooks.
349

350
  This LU is intended as a parent for other LogicalUnits which will
351
  run no hooks, in order to reduce duplicate code.
352

353
  """
354
  HPATH = None
355
  HTYPE = None
356

    
357

    
358
class Tasklet:
359
  """Tasklet base class.
360

361
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
362
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
363
  tasklets know nothing about locks.
364

365
  Subclasses must follow these rules:
366
    - Implement CheckPrereq
367
    - Implement Exec
368

369
  """
370
  def __init__(self, lu):
371
    self.lu = lu
372

    
373
    # Shortcuts
374
    self.cfg = lu.cfg
375
    self.rpc = lu.rpc
376

    
377
  def CheckPrereq(self):
378
    """Check prerequisites for this tasklets.
379

380
    This method should check whether the prerequisites for the execution of
381
    this tasklet are fulfilled. It can do internode communication, but it
382
    should be idempotent - no cluster or system changes are allowed.
383

384
    The method should raise errors.OpPrereqError in case something is not
385
    fulfilled. Its return value is ignored.
386

387
    This method should also update all parameters to their canonical form if it
388
    hasn't been done before.
389

390
    """
391
    raise NotImplementedError
392

    
393
  def Exec(self, feedback_fn):
394
    """Execute the tasklet.
395

396
    This method should implement the actual work. It should raise
397
    errors.OpExecError for failures that are somewhat dealt with in code, or
398
    expected.
399

400
    """
401
    raise NotImplementedError
402

    
403

    
404
def _GetWantedNodes(lu, nodes):
405
  """Returns list of checked and expanded node names.
406

407
  @type lu: L{LogicalUnit}
408
  @param lu: the logical unit on whose behalf we execute
409
  @type nodes: list
410
  @param nodes: list of node names or None for all nodes
411
  @rtype: list
412
  @return: the list of nodes, sorted
413
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
414

415
  """
416
  if not isinstance(nodes, list):
417
    raise errors.OpPrereqError("Invalid argument type 'nodes'")
418

    
419
  if not nodes:
420
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
421
      " non-empty list of nodes whose name is to be expanded.")
422

    
423
  wanted = []
424
  for name in nodes:
425
    node = lu.cfg.ExpandNodeName(name)
426
    if node is None:
427
      raise errors.OpPrereqError("No such node name '%s'" % name)
428
    wanted.append(node)
429

    
430
  return utils.NiceSort(wanted)
431

    
432

    
433
def _GetWantedInstances(lu, instances):
434
  """Returns list of checked and expanded instance names.
435

436
  @type lu: L{LogicalUnit}
437
  @param lu: the logical unit on whose behalf we execute
438
  @type instances: list
439
  @param instances: list of instance names or None for all instances
440
  @rtype: list
441
  @return: the list of instances, sorted
442
  @raise errors.OpPrereqError: if the instances parameter is wrong type
443
  @raise errors.OpPrereqError: if any of the passed instances is not found
444

445
  """
446
  if not isinstance(instances, list):
447
    raise errors.OpPrereqError("Invalid argument type 'instances'")
448

    
449
  if instances:
450
    wanted = []
451

    
452
    for name in instances:
453
      instance = lu.cfg.ExpandInstanceName(name)
454
      if instance is None:
455
        raise errors.OpPrereqError("No such instance name '%s'" % name)
456
      wanted.append(instance)
457

    
458
  else:
459
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
460
  return wanted
461

    
462

    
463
def _CheckOutputFields(static, dynamic, selected):
464
  """Checks whether all selected fields are valid.
465

466
  @type static: L{utils.FieldSet}
467
  @param static: static fields set
468
  @type dynamic: L{utils.FieldSet}
469
  @param dynamic: dynamic fields set
470

471
  """
472
  f = utils.FieldSet()
473
  f.Extend(static)
474
  f.Extend(dynamic)
475

    
476
  delta = f.NonMatching(selected)
477
  if delta:
478
    raise errors.OpPrereqError("Unknown output fields selected: %s"
479
                               % ",".join(delta))
480

    
481

    
482
def _CheckBooleanOpField(op, name):
483
  """Validates boolean opcode parameters.
484

485
  This will ensure that an opcode parameter is either a boolean value,
486
  or None (but that it always exists).
487

488
  """
489
  val = getattr(op, name, None)
490
  if not (val is None or isinstance(val, bool)):
491
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
492
                               (name, str(val)))
493
  setattr(op, name, val)
494

    
495

    
496
def _CheckNodeOnline(lu, node):
497
  """Ensure that a given node is online.
498

499
  @param lu: the LU on behalf of which we make the check
500
  @param node: the node to check
501
  @raise errors.OpPrereqError: if the node is offline
502

503
  """
504
  if lu.cfg.GetNodeInfo(node).offline:
505
    raise errors.OpPrereqError("Can't use offline node %s" % node)
506

    
507

    
508
def _CheckNodeNotDrained(lu, node):
509
  """Ensure that a given node is not drained.
510

511
  @param lu: the LU on behalf of which we make the check
512
  @param node: the node to check
513
  @raise errors.OpPrereqError: if the node is drained
514

515
  """
516
  if lu.cfg.GetNodeInfo(node).drained:
517
    raise errors.OpPrereqError("Can't use drained node %s" % node)
518

    
519

    
520
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
521
                          memory, vcpus, nics, disk_template, disks,
522
                          bep, hvp, hypervisor_name):
523
  """Builds instance related env variables for hooks
524

525
  This builds the hook environment from individual variables.
526

527
  @type name: string
528
  @param name: the name of the instance
529
  @type primary_node: string
530
  @param primary_node: the name of the instance's primary node
531
  @type secondary_nodes: list
532
  @param secondary_nodes: list of secondary nodes as strings
533
  @type os_type: string
534
  @param os_type: the name of the instance's OS
535
  @type status: boolean
536
  @param status: the should_run status of the instance
537
  @type memory: string
538
  @param memory: the memory size of the instance
539
  @type vcpus: string
540
  @param vcpus: the count of VCPUs the instance has
541
  @type nics: list
542
  @param nics: list of tuples (ip, mac, mode, link) representing
543
      the NICs the instance has
544
  @type disk_template: string
545
  @param disk_template: the disk template of the instance
546
  @type disks: list
547
  @param disks: the list of (size, mode) pairs
548
  @type bep: dict
549
  @param bep: the backend parameters for the instance
550
  @type hvp: dict
551
  @param hvp: the hypervisor parameters for the instance
552
  @type hypervisor_name: string
553
  @param hypervisor_name: the hypervisor for the instance
554
  @rtype: dict
555
  @return: the hook environment for this instance
556

557
  """
558
  if status:
559
    str_status = "up"
560
  else:
561
    str_status = "down"
562
  env = {
563
    "OP_TARGET": name,
564
    "INSTANCE_NAME": name,
565
    "INSTANCE_PRIMARY": primary_node,
566
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
567
    "INSTANCE_OS_TYPE": os_type,
568
    "INSTANCE_STATUS": str_status,
569
    "INSTANCE_MEMORY": memory,
570
    "INSTANCE_VCPUS": vcpus,
571
    "INSTANCE_DISK_TEMPLATE": disk_template,
572
    "INSTANCE_HYPERVISOR": hypervisor_name,
573
  }
574

    
575
  if nics:
576
    nic_count = len(nics)
577
    for idx, (ip, mac, mode, link) in enumerate(nics):
578
      if ip is None:
579
        ip = ""
580
      env["INSTANCE_NIC%d_IP" % idx] = ip
581
      env["INSTANCE_NIC%d_MAC" % idx] = mac
582
      env["INSTANCE_NIC%d_MODE" % idx] = mode
583
      env["INSTANCE_NIC%d_LINK" % idx] = link
584
      if mode == constants.NIC_MODE_BRIDGED:
585
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
586
  else:
587
    nic_count = 0
588

    
589
  env["INSTANCE_NIC_COUNT"] = nic_count
590

    
591
  if disks:
592
    disk_count = len(disks)
593
    for idx, (size, mode) in enumerate(disks):
594
      env["INSTANCE_DISK%d_SIZE" % idx] = size
595
      env["INSTANCE_DISK%d_MODE" % idx] = mode
596
  else:
597
    disk_count = 0
598

    
599
  env["INSTANCE_DISK_COUNT"] = disk_count
600

    
601
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
602
    for key, value in source.items():
603
      env["INSTANCE_%s_%s" % (kind, key)] = value
604

    
605
  return env
606

    
607
def _NICListToTuple(lu, nics):
608
  """Build a list of nic information tuples.
609

610
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
611
  value in LUQueryInstanceData.
612

613
  @type lu:  L{LogicalUnit}
614
  @param lu: the logical unit on whose behalf we execute
615
  @type nics: list of L{objects.NIC}
616
  @param nics: list of nics to convert to hooks tuples
617

618
  """
619
  hooks_nics = []
620
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
621
  for nic in nics:
622
    ip = nic.ip
623
    mac = nic.mac
624
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
625
    mode = filled_params[constants.NIC_MODE]
626
    link = filled_params[constants.NIC_LINK]
627
    hooks_nics.append((ip, mac, mode, link))
628
  return hooks_nics
629

    
630
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
631
  """Builds instance related env variables for hooks from an object.
632

633
  @type lu: L{LogicalUnit}
634
  @param lu: the logical unit on whose behalf we execute
635
  @type instance: L{objects.Instance}
636
  @param instance: the instance for which we should build the
637
      environment
638
  @type override: dict
639
  @param override: dictionary with key/values that will override
640
      our values
641
  @rtype: dict
642
  @return: the hook environment dictionary
643

644
  """
645
  cluster = lu.cfg.GetClusterInfo()
646
  bep = cluster.FillBE(instance)
647
  hvp = cluster.FillHV(instance)
648
  args = {
649
    'name': instance.name,
650
    'primary_node': instance.primary_node,
651
    'secondary_nodes': instance.secondary_nodes,
652
    'os_type': instance.os,
653
    'status': instance.admin_up,
654
    'memory': bep[constants.BE_MEMORY],
655
    'vcpus': bep[constants.BE_VCPUS],
656
    'nics': _NICListToTuple(lu, instance.nics),
657
    'disk_template': instance.disk_template,
658
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
659
    'bep': bep,
660
    'hvp': hvp,
661
    'hypervisor_name': instance.hypervisor,
662
  }
663
  if override:
664
    args.update(override)
665
  return _BuildInstanceHookEnv(**args)
666

    
667

    
668
def _AdjustCandidatePool(lu):
669
  """Adjust the candidate pool after node operations.
670

671
  """
672
  mod_list = lu.cfg.MaintainCandidatePool()
673
  if mod_list:
674
    lu.LogInfo("Promoted nodes to master candidate role: %s",
675
               ", ".join(node.name for node in mod_list))
676
    for name in mod_list:
677
      lu.context.ReaddNode(name)
678
  mc_now, mc_max = lu.cfg.GetMasterCandidateStats()
679
  if mc_now > mc_max:
680
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
681
               (mc_now, mc_max))
682

    
683

    
684
def _CheckNicsBridgesExist(lu, target_nics, target_node,
685
                               profile=constants.PP_DEFAULT):
686
  """Check that the brigdes needed by a list of nics exist.
687

688
  """
689
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
690
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
691
                for nic in target_nics]
692
  brlist = [params[constants.NIC_LINK] for params in paramslist
693
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
694
  if brlist:
695
    result = lu.rpc.call_bridges_exist(target_node, brlist)
696
    result.Raise("Error checking bridges on destination node '%s'" %
697
                 target_node, prereq=True)
698

    
699

    
700
def _CheckInstanceBridgesExist(lu, instance, node=None):
701
  """Check that the brigdes needed by an instance exist.
702

703
  """
704
  if node is None:
705
    node = instance.primary_node
706
  _CheckNicsBridgesExist(lu, instance.nics, node)
707

    
708

    
709
def _GetNodeSecondaryInstances(cfg, node_name):
710
  """Returns secondary instances on a node.
711

712
  """
713
  instances = []
714

    
715
  for (_, inst) in cfg.GetAllInstancesInfo().iteritems():
716
    if node_name in inst.secondary_nodes:
717
      instances.append(inst)
718

    
719
  return instances
720

    
721

    
722
class LUDestroyCluster(NoHooksLU):
723
  """Logical unit for destroying the cluster.
724

725
  """
726
  _OP_REQP = []
727

    
728
  def CheckPrereq(self):
729
    """Check prerequisites.
730

731
    This checks whether the cluster is empty.
732

733
    Any errors are signaled by raising errors.OpPrereqError.
734

735
    """
736
    master = self.cfg.GetMasterNode()
737

    
738
    nodelist = self.cfg.GetNodeList()
739
    if len(nodelist) != 1 or nodelist[0] != master:
740
      raise errors.OpPrereqError("There are still %d node(s) in"
741
                                 " this cluster." % (len(nodelist) - 1))
742
    instancelist = self.cfg.GetInstanceList()
743
    if instancelist:
744
      raise errors.OpPrereqError("There are still %d instance(s) in"
745
                                 " this cluster." % len(instancelist))
746

    
747
  def Exec(self, feedback_fn):
748
    """Destroys the cluster.
749

750
    """
751
    master = self.cfg.GetMasterNode()
752
    result = self.rpc.call_node_stop_master(master, False)
753
    result.Raise("Could not disable the master role")
754
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
755
    utils.CreateBackup(priv_key)
756
    utils.CreateBackup(pub_key)
757
    return master
758

    
759

    
760
class LUVerifyCluster(LogicalUnit):
761
  """Verifies the cluster status.
762

763
  """
764
  HPATH = "cluster-verify"
765
  HTYPE = constants.HTYPE_CLUSTER
766
  _OP_REQP = ["skip_checks"]
767
  REQ_BGL = False
768

    
769
  def ExpandNames(self):
770
    self.needed_locks = {
771
      locking.LEVEL_NODE: locking.ALL_SET,
772
      locking.LEVEL_INSTANCE: locking.ALL_SET,
773
    }
774
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
775

    
776
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
777
                  node_result, feedback_fn, master_files,
778
                  drbd_map, vg_name):
779
    """Run multiple tests against a node.
780

781
    Test list:
782

783
      - compares ganeti version
784
      - checks vg existence and size > 20G
785
      - checks config file checksum
786
      - checks ssh to other nodes
787

788
    @type nodeinfo: L{objects.Node}
789
    @param nodeinfo: the node to check
790
    @param file_list: required list of files
791
    @param local_cksum: dictionary of local files and their checksums
792
    @param node_result: the results from the node
793
    @param feedback_fn: function used to accumulate results
794
    @param master_files: list of files that only masters should have
795
    @param drbd_map: the useddrbd minors for this node, in
796
        form of minor: (instance, must_exist) which correspond to instances
797
        and their running status
798
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
799

800
    """
801
    node = nodeinfo.name
802

    
803
    # main result, node_result should be a non-empty dict
804
    if not node_result or not isinstance(node_result, dict):
805
      feedback_fn("  - ERROR: unable to verify node %s." % (node,))
806
      return True
807

    
808
    # compares ganeti version
809
    local_version = constants.PROTOCOL_VERSION
810
    remote_version = node_result.get('version', None)
811
    if not (remote_version and isinstance(remote_version, (list, tuple)) and
812
            len(remote_version) == 2):
813
      feedback_fn("  - ERROR: connection to %s failed" % (node))
814
      return True
815

    
816
    if local_version != remote_version[0]:
817
      feedback_fn("  - ERROR: incompatible protocol versions: master %s,"
818
                  " node %s %s" % (local_version, node, remote_version[0]))
819
      return True
820

    
821
    # node seems compatible, we can actually try to look into its results
822

    
823
    bad = False
824

    
825
    # full package version
826
    if constants.RELEASE_VERSION != remote_version[1]:
827
      feedback_fn("  - WARNING: software version mismatch: master %s,"
828
                  " node %s %s" %
829
                  (constants.RELEASE_VERSION, node, remote_version[1]))
830

    
831
    # checks vg existence and size > 20G
832
    if vg_name is not None:
833
      vglist = node_result.get(constants.NV_VGLIST, None)
834
      if not vglist:
835
        feedback_fn("  - ERROR: unable to check volume groups on node %s." %
836
                        (node,))
837
        bad = True
838
      else:
839
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
840
                                              constants.MIN_VG_SIZE)
841
        if vgstatus:
842
          feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
843
          bad = True
844

    
845
    # checks config file checksum
846

    
847
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
848
    if not isinstance(remote_cksum, dict):
849
      bad = True
850
      feedback_fn("  - ERROR: node hasn't returned file checksum data")
851
    else:
852
      for file_name in file_list:
853
        node_is_mc = nodeinfo.master_candidate
854
        must_have_file = file_name not in master_files
855
        if file_name not in remote_cksum:
856
          if node_is_mc or must_have_file:
857
            bad = True
858
            feedback_fn("  - ERROR: file '%s' missing" % file_name)
859
        elif remote_cksum[file_name] != local_cksum[file_name]:
860
          if node_is_mc or must_have_file:
861
            bad = True
862
            feedback_fn("  - ERROR: file '%s' has wrong checksum" % file_name)
863
          else:
864
            # not candidate and this is not a must-have file
865
            bad = True
866
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
867
                        " candidates (and the file is outdated)" % file_name)
868
        else:
869
          # all good, except non-master/non-must have combination
870
          if not node_is_mc and not must_have_file:
871
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
872
                        " candidates" % file_name)
873

    
874
    # checks ssh to any
875

    
876
    if constants.NV_NODELIST not in node_result:
877
      bad = True
878
      feedback_fn("  - ERROR: node hasn't returned node ssh connectivity data")
879
    else:
880
      if node_result[constants.NV_NODELIST]:
881
        bad = True
882
        for node in node_result[constants.NV_NODELIST]:
883
          feedback_fn("  - ERROR: ssh communication with node '%s': %s" %
884
                          (node, node_result[constants.NV_NODELIST][node]))
885

    
886
    if constants.NV_NODENETTEST not in node_result:
887
      bad = True
888
      feedback_fn("  - ERROR: node hasn't returned node tcp connectivity data")
889
    else:
890
      if node_result[constants.NV_NODENETTEST]:
891
        bad = True
892
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
893
        for node in nlist:
894
          feedback_fn("  - ERROR: tcp communication with node '%s': %s" %
895
                          (node, node_result[constants.NV_NODENETTEST][node]))
896

    
897
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
898
    if isinstance(hyp_result, dict):
899
      for hv_name, hv_result in hyp_result.iteritems():
900
        if hv_result is not None:
901
          feedback_fn("  - ERROR: hypervisor %s verify failure: '%s'" %
902
                      (hv_name, hv_result))
903

    
904
    # check used drbd list
905
    if vg_name is not None:
906
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
907
      if not isinstance(used_minors, (tuple, list)):
908
        feedback_fn("  - ERROR: cannot parse drbd status file: %s" %
909
                    str(used_minors))
910
      else:
911
        for minor, (iname, must_exist) in drbd_map.items():
912
          if minor not in used_minors and must_exist:
913
            feedback_fn("  - ERROR: drbd minor %d of instance %s is"
914
                        " not active" % (minor, iname))
915
            bad = True
916
        for minor in used_minors:
917
          if minor not in drbd_map:
918
            feedback_fn("  - ERROR: unallocated drbd minor %d is in use" %
919
                        minor)
920
            bad = True
921

    
922
    return bad
923

    
924
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
925
                      node_instance, feedback_fn, n_offline):
926
    """Verify an instance.
927

928
    This function checks to see if the required block devices are
929
    available on the instance's node.
930

931
    """
932
    bad = False
933

    
934
    node_current = instanceconfig.primary_node
935

    
936
    node_vol_should = {}
937
    instanceconfig.MapLVsByNode(node_vol_should)
938

    
939
    for node in node_vol_should:
940
      if node in n_offline:
941
        # ignore missing volumes on offline nodes
942
        continue
943
      for volume in node_vol_should[node]:
944
        if node not in node_vol_is or volume not in node_vol_is[node]:
945
          feedback_fn("  - ERROR: volume %s missing on node %s" %
946
                          (volume, node))
947
          bad = True
948

    
949
    if instanceconfig.admin_up:
950
      if ((node_current not in node_instance or
951
          not instance in node_instance[node_current]) and
952
          node_current not in n_offline):
953
        feedback_fn("  - ERROR: instance %s not running on node %s" %
954
                        (instance, node_current))
955
        bad = True
956

    
957
    for node in node_instance:
958
      if (not node == node_current):
959
        if instance in node_instance[node]:
960
          feedback_fn("  - ERROR: instance %s should not run on node %s" %
961
                          (instance, node))
962
          bad = True
963

    
964
    return bad
965

    
966
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
967
    """Verify if there are any unknown volumes in the cluster.
968

969
    The .os, .swap and backup volumes are ignored. All other volumes are
970
    reported as unknown.
971

972
    """
973
    bad = False
974

    
975
    for node in node_vol_is:
976
      for volume in node_vol_is[node]:
977
        if node not in node_vol_should or volume not in node_vol_should[node]:
978
          feedback_fn("  - ERROR: volume %s on node %s should not exist" %
979
                      (volume, node))
980
          bad = True
981
    return bad
982

    
983
  def _VerifyOrphanInstances(self, instancelist, node_instance, feedback_fn):
984
    """Verify the list of running instances.
985

986
    This checks what instances are running but unknown to the cluster.
987

988
    """
989
    bad = False
990
    for node in node_instance:
991
      for runninginstance in node_instance[node]:
992
        if runninginstance not in instancelist:
993
          feedback_fn("  - ERROR: instance %s on node %s should not exist" %
994
                          (runninginstance, node))
995
          bad = True
996
    return bad
997

    
998
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg, feedback_fn):
999
    """Verify N+1 Memory Resilience.
1000

1001
    Check that if one single node dies we can still start all the instances it
1002
    was primary for.
1003

1004
    """
1005
    bad = False
1006

    
1007
    for node, nodeinfo in node_info.iteritems():
1008
      # This code checks that every node which is now listed as secondary has
1009
      # enough memory to host all instances it is supposed to should a single
1010
      # other node in the cluster fail.
1011
      # FIXME: not ready for failover to an arbitrary node
1012
      # FIXME: does not support file-backed instances
1013
      # WARNING: we currently take into account down instances as well as up
1014
      # ones, considering that even if they're down someone might want to start
1015
      # them even in the event of a node failure.
1016
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1017
        needed_mem = 0
1018
        for instance in instances:
1019
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1020
          if bep[constants.BE_AUTO_BALANCE]:
1021
            needed_mem += bep[constants.BE_MEMORY]
1022
        if nodeinfo['mfree'] < needed_mem:
1023
          feedback_fn("  - ERROR: not enough memory on node %s to accommodate"
1024
                      " failovers should node %s fail" % (node, prinode))
1025
          bad = True
1026
    return bad
1027

    
1028
  def CheckPrereq(self):
1029
    """Check prerequisites.
1030

1031
    Transform the list of checks we're going to skip into a set and check that
1032
    all its members are valid.
1033

1034
    """
1035
    self.skip_set = frozenset(self.op.skip_checks)
1036
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1037
      raise errors.OpPrereqError("Invalid checks to be skipped specified")
1038

    
1039
  def BuildHooksEnv(self):
1040
    """Build hooks env.
1041

1042
    Cluster-Verify hooks just ran in the post phase and their failure makes
1043
    the output be logged in the verify output and the verification to fail.
1044

1045
    """
1046
    all_nodes = self.cfg.GetNodeList()
1047
    env = {
1048
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1049
      }
1050
    for node in self.cfg.GetAllNodesInfo().values():
1051
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1052

    
1053
    return env, [], all_nodes
1054

    
1055
  def Exec(self, feedback_fn):
1056
    """Verify integrity of cluster, performing various test on nodes.
1057

1058
    """
1059
    bad = False
1060
    feedback_fn("* Verifying global settings")
1061
    for msg in self.cfg.VerifyConfig():
1062
      feedback_fn("  - ERROR: %s" % msg)
1063

    
1064
    vg_name = self.cfg.GetVGName()
1065
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1066
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1067
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1068
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1069
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1070
                        for iname in instancelist)
1071
    i_non_redundant = [] # Non redundant instances
1072
    i_non_a_balanced = [] # Non auto-balanced instances
1073
    n_offline = [] # List of offline nodes
1074
    n_drained = [] # List of nodes being drained
1075
    node_volume = {}
1076
    node_instance = {}
1077
    node_info = {}
1078
    instance_cfg = {}
1079

    
1080
    # FIXME: verify OS list
1081
    # do local checksums
1082
    master_files = [constants.CLUSTER_CONF_FILE]
1083

    
1084
    file_names = ssconf.SimpleStore().GetFileList()
1085
    file_names.append(constants.SSL_CERT_FILE)
1086
    file_names.append(constants.RAPI_CERT_FILE)
1087
    file_names.extend(master_files)
1088

    
1089
    local_checksums = utils.FingerprintFiles(file_names)
1090

    
1091
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1092
    node_verify_param = {
1093
      constants.NV_FILELIST: file_names,
1094
      constants.NV_NODELIST: [node.name for node in nodeinfo
1095
                              if not node.offline],
1096
      constants.NV_HYPERVISOR: hypervisors,
1097
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1098
                                  node.secondary_ip) for node in nodeinfo
1099
                                 if not node.offline],
1100
      constants.NV_INSTANCELIST: hypervisors,
1101
      constants.NV_VERSION: None,
1102
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1103
      }
1104
    if vg_name is not None:
1105
      node_verify_param[constants.NV_VGLIST] = None
1106
      node_verify_param[constants.NV_LVLIST] = vg_name
1107
      node_verify_param[constants.NV_DRBDLIST] = None
1108
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1109
                                           self.cfg.GetClusterName())
1110

    
1111
    cluster = self.cfg.GetClusterInfo()
1112
    master_node = self.cfg.GetMasterNode()
1113
    all_drbd_map = self.cfg.ComputeDRBDMap()
1114

    
1115
    for node_i in nodeinfo:
1116
      node = node_i.name
1117

    
1118
      if node_i.offline:
1119
        feedback_fn("* Skipping offline node %s" % (node,))
1120
        n_offline.append(node)
1121
        continue
1122

    
1123
      if node == master_node:
1124
        ntype = "master"
1125
      elif node_i.master_candidate:
1126
        ntype = "master candidate"
1127
      elif node_i.drained:
1128
        ntype = "drained"
1129
        n_drained.append(node)
1130
      else:
1131
        ntype = "regular"
1132
      feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1133

    
1134
      msg = all_nvinfo[node].fail_msg
1135
      if msg:
1136
        feedback_fn("  - ERROR: while contacting node %s: %s" % (node, msg))
1137
        bad = True
1138
        continue
1139

    
1140
      nresult = all_nvinfo[node].payload
1141
      node_drbd = {}
1142
      for minor, instance in all_drbd_map[node].items():
1143
        if instance not in instanceinfo:
1144
          feedback_fn("  - ERROR: ghost instance '%s' in temporary DRBD map" %
1145
                      instance)
1146
          # ghost instance should not be running, but otherwise we
1147
          # don't give double warnings (both ghost instance and
1148
          # unallocated minor in use)
1149
          node_drbd[minor] = (instance, False)
1150
        else:
1151
          instance = instanceinfo[instance]
1152
          node_drbd[minor] = (instance.name, instance.admin_up)
1153
      result = self._VerifyNode(node_i, file_names, local_checksums,
1154
                                nresult, feedback_fn, master_files,
1155
                                node_drbd, vg_name)
1156
      bad = bad or result
1157

    
1158
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1159
      if vg_name is None:
1160
        node_volume[node] = {}
1161
      elif isinstance(lvdata, basestring):
1162
        feedback_fn("  - ERROR: LVM problem on node %s: %s" %
1163
                    (node, utils.SafeEncode(lvdata)))
1164
        bad = True
1165
        node_volume[node] = {}
1166
      elif not isinstance(lvdata, dict):
1167
        feedback_fn("  - ERROR: connection to %s failed (lvlist)" % (node,))
1168
        bad = True
1169
        continue
1170
      else:
1171
        node_volume[node] = lvdata
1172

    
1173
      # node_instance
1174
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1175
      if not isinstance(idata, list):
1176
        feedback_fn("  - ERROR: connection to %s failed (instancelist)" %
1177
                    (node,))
1178
        bad = True
1179
        continue
1180

    
1181
      node_instance[node] = idata
1182

    
1183
      # node_info
1184
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1185
      if not isinstance(nodeinfo, dict):
1186
        feedback_fn("  - ERROR: connection to %s failed (hvinfo)" % (node,))
1187
        bad = True
1188
        continue
1189

    
1190
      try:
1191
        node_info[node] = {
1192
          "mfree": int(nodeinfo['memory_free']),
1193
          "pinst": [],
1194
          "sinst": [],
1195
          # dictionary holding all instances this node is secondary for,
1196
          # grouped by their primary node. Each key is a cluster node, and each
1197
          # value is a list of instances which have the key as primary and the
1198
          # current node as secondary.  this is handy to calculate N+1 memory
1199
          # availability if you can only failover from a primary to its
1200
          # secondary.
1201
          "sinst-by-pnode": {},
1202
        }
1203
        # FIXME: devise a free space model for file based instances as well
1204
        if vg_name is not None:
1205
          if (constants.NV_VGLIST not in nresult or
1206
              vg_name not in nresult[constants.NV_VGLIST]):
1207
            feedback_fn("  - ERROR: node %s didn't return data for the"
1208
                        " volume group '%s' - it is either missing or broken" %
1209
                        (node, vg_name))
1210
            bad = True
1211
            continue
1212
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1213
      except (ValueError, KeyError):
1214
        feedback_fn("  - ERROR: invalid nodeinfo value returned"
1215
                    " from node %s" % (node,))
1216
        bad = True
1217
        continue
1218

    
1219
    node_vol_should = {}
1220

    
1221
    for instance in instancelist:
1222
      feedback_fn("* Verifying instance %s" % instance)
1223
      inst_config = instanceinfo[instance]
1224
      result =  self._VerifyInstance(instance, inst_config, node_volume,
1225
                                     node_instance, feedback_fn, n_offline)
1226
      bad = bad or result
1227
      inst_nodes_offline = []
1228

    
1229
      inst_config.MapLVsByNode(node_vol_should)
1230

    
1231
      instance_cfg[instance] = inst_config
1232

    
1233
      pnode = inst_config.primary_node
1234
      if pnode in node_info:
1235
        node_info[pnode]['pinst'].append(instance)
1236
      elif pnode not in n_offline:
1237
        feedback_fn("  - ERROR: instance %s, connection to primary node"
1238
                    " %s failed" % (instance, pnode))
1239
        bad = True
1240

    
1241
      if pnode in n_offline:
1242
        inst_nodes_offline.append(pnode)
1243

    
1244
      # If the instance is non-redundant we cannot survive losing its primary
1245
      # node, so we are not N+1 compliant. On the other hand we have no disk
1246
      # templates with more than one secondary so that situation is not well
1247
      # supported either.
1248
      # FIXME: does not support file-backed instances
1249
      if len(inst_config.secondary_nodes) == 0:
1250
        i_non_redundant.append(instance)
1251
      elif len(inst_config.secondary_nodes) > 1:
1252
        feedback_fn("  - WARNING: multiple secondaries for instance %s"
1253
                    % instance)
1254

    
1255
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1256
        i_non_a_balanced.append(instance)
1257

    
1258
      for snode in inst_config.secondary_nodes:
1259
        if snode in node_info:
1260
          node_info[snode]['sinst'].append(instance)
1261
          if pnode not in node_info[snode]['sinst-by-pnode']:
1262
            node_info[snode]['sinst-by-pnode'][pnode] = []
1263
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1264
        elif snode not in n_offline:
1265
          feedback_fn("  - ERROR: instance %s, connection to secondary node"
1266
                      " %s failed" % (instance, snode))
1267
          bad = True
1268
        if snode in n_offline:
1269
          inst_nodes_offline.append(snode)
1270

    
1271
      if inst_nodes_offline:
1272
        # warn that the instance lives on offline nodes, and set bad=True
1273
        feedback_fn("  - ERROR: instance lives on offline node(s) %s" %
1274
                    ", ".join(inst_nodes_offline))
1275
        bad = True
1276

    
1277
    feedback_fn("* Verifying orphan volumes")
1278
    result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
1279
                                       feedback_fn)
1280
    bad = bad or result
1281

    
1282
    feedback_fn("* Verifying remaining instances")
1283
    result = self._VerifyOrphanInstances(instancelist, node_instance,
1284
                                         feedback_fn)
1285
    bad = bad or result
1286

    
1287
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1288
      feedback_fn("* Verifying N+1 Memory redundancy")
1289
      result = self._VerifyNPlusOneMemory(node_info, instance_cfg, feedback_fn)
1290
      bad = bad or result
1291

    
1292
    feedback_fn("* Other Notes")
1293
    if i_non_redundant:
1294
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1295
                  % len(i_non_redundant))
1296

    
1297
    if i_non_a_balanced:
1298
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1299
                  % len(i_non_a_balanced))
1300

    
1301
    if n_offline:
1302
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1303

    
1304
    if n_drained:
1305
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1306

    
1307
    return not bad
1308

    
1309
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1310
    """Analyze the post-hooks' result
1311

1312
    This method analyses the hook result, handles it, and sends some
1313
    nicely-formatted feedback back to the user.
1314

1315
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1316
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1317
    @param hooks_results: the results of the multi-node hooks rpc call
1318
    @param feedback_fn: function used send feedback back to the caller
1319
    @param lu_result: previous Exec result
1320
    @return: the new Exec result, based on the previous result
1321
        and hook results
1322

1323
    """
1324
    # We only really run POST phase hooks, and are only interested in
1325
    # their results
1326
    if phase == constants.HOOKS_PHASE_POST:
1327
      # Used to change hooks' output to proper indentation
1328
      indent_re = re.compile('^', re.M)
1329
      feedback_fn("* Hooks Results")
1330
      if not hooks_results:
1331
        feedback_fn("  - ERROR: general communication failure")
1332
        lu_result = 1
1333
      else:
1334
        for node_name in hooks_results:
1335
          show_node_header = True
1336
          res = hooks_results[node_name]
1337
          msg = res.fail_msg
1338
          if msg:
1339
            if res.offline:
1340
              # no need to warn or set fail return value
1341
              continue
1342
            feedback_fn("    Communication failure in hooks execution: %s" %
1343
                        msg)
1344
            lu_result = 1
1345
            continue
1346
          for script, hkr, output in res.payload:
1347
            if hkr == constants.HKR_FAIL:
1348
              # The node header is only shown once, if there are
1349
              # failing hooks on that node
1350
              if show_node_header:
1351
                feedback_fn("  Node %s:" % node_name)
1352
                show_node_header = False
1353
              feedback_fn("    ERROR: Script %s failed, output:" % script)
1354
              output = indent_re.sub('      ', output)
1355
              feedback_fn("%s" % output)
1356
              lu_result = 1
1357

    
1358
      return lu_result
1359

    
1360

    
1361
class LUVerifyDisks(NoHooksLU):
1362
  """Verifies the cluster disks status.
1363

1364
  """
1365
  _OP_REQP = []
1366
  REQ_BGL = False
1367

    
1368
  def ExpandNames(self):
1369
    self.needed_locks = {
1370
      locking.LEVEL_NODE: locking.ALL_SET,
1371
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1372
    }
1373
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1374

    
1375
  def CheckPrereq(self):
1376
    """Check prerequisites.
1377

1378
    This has no prerequisites.
1379

1380
    """
1381
    pass
1382

    
1383
  def Exec(self, feedback_fn):
1384
    """Verify integrity of cluster disks.
1385

1386
    @rtype: tuple of three items
1387
    @return: a tuple of (dict of node-to-node_error, list of instances
1388
        which need activate-disks, dict of instance: (node, volume) for
1389
        missing volumes
1390

1391
    """
1392
    result = res_nodes, res_instances, res_missing = {}, [], {}
1393

    
1394
    vg_name = self.cfg.GetVGName()
1395
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1396
    instances = [self.cfg.GetInstanceInfo(name)
1397
                 for name in self.cfg.GetInstanceList()]
1398

    
1399
    nv_dict = {}
1400
    for inst in instances:
1401
      inst_lvs = {}
1402
      if (not inst.admin_up or
1403
          inst.disk_template not in constants.DTS_NET_MIRROR):
1404
        continue
1405
      inst.MapLVsByNode(inst_lvs)
1406
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1407
      for node, vol_list in inst_lvs.iteritems():
1408
        for vol in vol_list:
1409
          nv_dict[(node, vol)] = inst
1410

    
1411
    if not nv_dict:
1412
      return result
1413

    
1414
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1415

    
1416
    for node in nodes:
1417
      # node_volume
1418
      node_res = node_lvs[node]
1419
      if node_res.offline:
1420
        continue
1421
      msg = node_res.fail_msg
1422
      if msg:
1423
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1424
        res_nodes[node] = msg
1425
        continue
1426

    
1427
      lvs = node_res.payload
1428
      for lv_name, (_, lv_inactive, lv_online) in lvs.items():
1429
        inst = nv_dict.pop((node, lv_name), None)
1430
        if (not lv_online and inst is not None
1431
            and inst.name not in res_instances):
1432
          res_instances.append(inst.name)
1433

    
1434
    # any leftover items in nv_dict are missing LVs, let's arrange the
1435
    # data better
1436
    for key, inst in nv_dict.iteritems():
1437
      if inst.name not in res_missing:
1438
        res_missing[inst.name] = []
1439
      res_missing[inst.name].append(key)
1440

    
1441
    return result
1442

    
1443

    
1444
class LURenameCluster(LogicalUnit):
1445
  """Rename the cluster.
1446

1447
  """
1448
  HPATH = "cluster-rename"
1449
  HTYPE = constants.HTYPE_CLUSTER
1450
  _OP_REQP = ["name"]
1451

    
1452
  def BuildHooksEnv(self):
1453
    """Build hooks env.
1454

1455
    """
1456
    env = {
1457
      "OP_TARGET": self.cfg.GetClusterName(),
1458
      "NEW_NAME": self.op.name,
1459
      }
1460
    mn = self.cfg.GetMasterNode()
1461
    return env, [mn], [mn]
1462

    
1463
  def CheckPrereq(self):
1464
    """Verify that the passed name is a valid one.
1465

1466
    """
1467
    hostname = utils.HostInfo(self.op.name)
1468

    
1469
    new_name = hostname.name
1470
    self.ip = new_ip = hostname.ip
1471
    old_name = self.cfg.GetClusterName()
1472
    old_ip = self.cfg.GetMasterIP()
1473
    if new_name == old_name and new_ip == old_ip:
1474
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1475
                                 " cluster has changed")
1476
    if new_ip != old_ip:
1477
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1478
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1479
                                   " reachable on the network. Aborting." %
1480
                                   new_ip)
1481

    
1482
    self.op.name = new_name
1483

    
1484
  def Exec(self, feedback_fn):
1485
    """Rename the cluster.
1486

1487
    """
1488
    clustername = self.op.name
1489
    ip = self.ip
1490

    
1491
    # shutdown the master IP
1492
    master = self.cfg.GetMasterNode()
1493
    result = self.rpc.call_node_stop_master(master, False)
1494
    result.Raise("Could not disable the master role")
1495

    
1496
    try:
1497
      cluster = self.cfg.GetClusterInfo()
1498
      cluster.cluster_name = clustername
1499
      cluster.master_ip = ip
1500
      self.cfg.Update(cluster)
1501

    
1502
      # update the known hosts file
1503
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1504
      node_list = self.cfg.GetNodeList()
1505
      try:
1506
        node_list.remove(master)
1507
      except ValueError:
1508
        pass
1509
      result = self.rpc.call_upload_file(node_list,
1510
                                         constants.SSH_KNOWN_HOSTS_FILE)
1511
      for to_node, to_result in result.iteritems():
1512
        msg = to_result.fail_msg
1513
        if msg:
1514
          msg = ("Copy of file %s to node %s failed: %s" %
1515
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1516
          self.proc.LogWarning(msg)
1517

    
1518
    finally:
1519
      result = self.rpc.call_node_start_master(master, False, False)
1520
      msg = result.fail_msg
1521
      if msg:
1522
        self.LogWarning("Could not re-enable the master role on"
1523
                        " the master, please restart manually: %s", msg)
1524

    
1525

    
1526
def _RecursiveCheckIfLVMBased(disk):
1527
  """Check if the given disk or its children are lvm-based.
1528

1529
  @type disk: L{objects.Disk}
1530
  @param disk: the disk to check
1531
  @rtype: boolean
1532
  @return: boolean indicating whether a LD_LV dev_type was found or not
1533

1534
  """
1535
  if disk.children:
1536
    for chdisk in disk.children:
1537
      if _RecursiveCheckIfLVMBased(chdisk):
1538
        return True
1539
  return disk.dev_type == constants.LD_LV
1540

    
1541

    
1542
class LUSetClusterParams(LogicalUnit):
1543
  """Change the parameters of the cluster.
1544

1545
  """
1546
  HPATH = "cluster-modify"
1547
  HTYPE = constants.HTYPE_CLUSTER
1548
  _OP_REQP = []
1549
  REQ_BGL = False
1550

    
1551
  def CheckArguments(self):
1552
    """Check parameters
1553

1554
    """
1555
    if not hasattr(self.op, "candidate_pool_size"):
1556
      self.op.candidate_pool_size = None
1557
    if self.op.candidate_pool_size is not None:
1558
      try:
1559
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1560
      except (ValueError, TypeError), err:
1561
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1562
                                   str(err))
1563
      if self.op.candidate_pool_size < 1:
1564
        raise errors.OpPrereqError("At least one master candidate needed")
1565

    
1566
  def ExpandNames(self):
1567
    # FIXME: in the future maybe other cluster params won't require checking on
1568
    # all nodes to be modified.
1569
    self.needed_locks = {
1570
      locking.LEVEL_NODE: locking.ALL_SET,
1571
    }
1572
    self.share_locks[locking.LEVEL_NODE] = 1
1573

    
1574
  def BuildHooksEnv(self):
1575
    """Build hooks env.
1576

1577
    """
1578
    env = {
1579
      "OP_TARGET": self.cfg.GetClusterName(),
1580
      "NEW_VG_NAME": self.op.vg_name,
1581
      }
1582
    mn = self.cfg.GetMasterNode()
1583
    return env, [mn], [mn]
1584

    
1585
  def CheckPrereq(self):
1586
    """Check prerequisites.
1587

1588
    This checks whether the given params don't conflict and
1589
    if the given volume group is valid.
1590

1591
    """
1592
    if self.op.vg_name is not None and not self.op.vg_name:
1593
      instances = self.cfg.GetAllInstancesInfo().values()
1594
      for inst in instances:
1595
        for disk in inst.disks:
1596
          if _RecursiveCheckIfLVMBased(disk):
1597
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1598
                                       " lvm-based instances exist")
1599

    
1600
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1601

    
1602
    # if vg_name not None, checks given volume group on all nodes
1603
    if self.op.vg_name:
1604
      vglist = self.rpc.call_vg_list(node_list)
1605
      for node in node_list:
1606
        msg = vglist[node].fail_msg
1607
        if msg:
1608
          # ignoring down node
1609
          self.LogWarning("Error while gathering data on node %s"
1610
                          " (ignoring node): %s", node, msg)
1611
          continue
1612
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
1613
                                              self.op.vg_name,
1614
                                              constants.MIN_VG_SIZE)
1615
        if vgstatus:
1616
          raise errors.OpPrereqError("Error on node '%s': %s" %
1617
                                     (node, vgstatus))
1618

    
1619
    self.cluster = cluster = self.cfg.GetClusterInfo()
1620
    # validate params changes
1621
    if self.op.beparams:
1622
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1623
      self.new_beparams = objects.FillDict(
1624
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
1625

    
1626
    if self.op.nicparams:
1627
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
1628
      self.new_nicparams = objects.FillDict(
1629
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
1630
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
1631

    
1632
    # hypervisor list/parameters
1633
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
1634
    if self.op.hvparams:
1635
      if not isinstance(self.op.hvparams, dict):
1636
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
1637
      for hv_name, hv_dict in self.op.hvparams.items():
1638
        if hv_name not in self.new_hvparams:
1639
          self.new_hvparams[hv_name] = hv_dict
1640
        else:
1641
          self.new_hvparams[hv_name].update(hv_dict)
1642

    
1643
    if self.op.enabled_hypervisors is not None:
1644
      self.hv_list = self.op.enabled_hypervisors
1645
      if not self.hv_list:
1646
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
1647
                                   " least one member")
1648
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
1649
      if invalid_hvs:
1650
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
1651
                                   " entries: %s" % invalid_hvs)
1652
    else:
1653
      self.hv_list = cluster.enabled_hypervisors
1654

    
1655
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
1656
      # either the enabled list has changed, or the parameters have, validate
1657
      for hv_name, hv_params in self.new_hvparams.items():
1658
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
1659
            (self.op.enabled_hypervisors and
1660
             hv_name in self.op.enabled_hypervisors)):
1661
          # either this is a new hypervisor, or its parameters have changed
1662
          hv_class = hypervisor.GetHypervisor(hv_name)
1663
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1664
          hv_class.CheckParameterSyntax(hv_params)
1665
          _CheckHVParams(self, node_list, hv_name, hv_params)
1666

    
1667
  def Exec(self, feedback_fn):
1668
    """Change the parameters of the cluster.
1669

1670
    """
1671
    if self.op.vg_name is not None:
1672
      new_volume = self.op.vg_name
1673
      if not new_volume:
1674
        new_volume = None
1675
      if new_volume != self.cfg.GetVGName():
1676
        self.cfg.SetVGName(new_volume)
1677
      else:
1678
        feedback_fn("Cluster LVM configuration already in desired"
1679
                    " state, not changing")
1680
    if self.op.hvparams:
1681
      self.cluster.hvparams = self.new_hvparams
1682
    if self.op.enabled_hypervisors is not None:
1683
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1684
    if self.op.beparams:
1685
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
1686
    if self.op.nicparams:
1687
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
1688

    
1689
    if self.op.candidate_pool_size is not None:
1690
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
1691
      # we need to update the pool size here, otherwise the save will fail
1692
      _AdjustCandidatePool(self)
1693

    
1694
    self.cfg.Update(self.cluster)
1695

    
1696

    
1697
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
1698
  """Distribute additional files which are part of the cluster configuration.
1699

1700
  ConfigWriter takes care of distributing the config and ssconf files, but
1701
  there are more files which should be distributed to all nodes. This function
1702
  makes sure those are copied.
1703

1704
  @param lu: calling logical unit
1705
  @param additional_nodes: list of nodes not in the config to distribute to
1706

1707
  """
1708
  # 1. Gather target nodes
1709
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
1710
  dist_nodes = lu.cfg.GetNodeList()
1711
  if additional_nodes is not None:
1712
    dist_nodes.extend(additional_nodes)
1713
  if myself.name in dist_nodes:
1714
    dist_nodes.remove(myself.name)
1715
  # 2. Gather files to distribute
1716
  dist_files = set([constants.ETC_HOSTS,
1717
                    constants.SSH_KNOWN_HOSTS_FILE,
1718
                    constants.RAPI_CERT_FILE,
1719
                    constants.RAPI_USERS_FILE,
1720
                    constants.HMAC_CLUSTER_KEY,
1721
                   ])
1722

    
1723
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
1724
  for hv_name in enabled_hypervisors:
1725
    hv_class = hypervisor.GetHypervisor(hv_name)
1726
    dist_files.update(hv_class.GetAncillaryFiles())
1727

    
1728
  # 3. Perform the files upload
1729
  for fname in dist_files:
1730
    if os.path.exists(fname):
1731
      result = lu.rpc.call_upload_file(dist_nodes, fname)
1732
      for to_node, to_result in result.items():
1733
        msg = to_result.fail_msg
1734
        if msg:
1735
          msg = ("Copy of file %s to node %s failed: %s" %
1736
                 (fname, to_node, msg))
1737
          lu.proc.LogWarning(msg)
1738

    
1739

    
1740
class LURedistributeConfig(NoHooksLU):
1741
  """Force the redistribution of cluster configuration.
1742

1743
  This is a very simple LU.
1744

1745
  """
1746
  _OP_REQP = []
1747
  REQ_BGL = False
1748

    
1749
  def ExpandNames(self):
1750
    self.needed_locks = {
1751
      locking.LEVEL_NODE: locking.ALL_SET,
1752
    }
1753
    self.share_locks[locking.LEVEL_NODE] = 1
1754

    
1755
  def CheckPrereq(self):
1756
    """Check prerequisites.
1757

1758
    """
1759

    
1760
  def Exec(self, feedback_fn):
1761
    """Redistribute the configuration.
1762

1763
    """
1764
    self.cfg.Update(self.cfg.GetClusterInfo())
1765
    _RedistributeAncillaryFiles(self)
1766

    
1767

    
1768
def _WaitForSync(lu, instance, oneshot=False, unlock=False):
1769
  """Sleep and poll for an instance's disk to sync.
1770

1771
  """
1772
  if not instance.disks:
1773
    return True
1774

    
1775
  if not oneshot:
1776
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
1777

    
1778
  node = instance.primary_node
1779

    
1780
  for dev in instance.disks:
1781
    lu.cfg.SetDiskID(dev, node)
1782

    
1783
  retries = 0
1784
  degr_retries = 10 # in seconds, as we sleep 1 second each time
1785
  while True:
1786
    max_time = 0
1787
    done = True
1788
    cumul_degraded = False
1789
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
1790
    msg = rstats.fail_msg
1791
    if msg:
1792
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
1793
      retries += 1
1794
      if retries >= 10:
1795
        raise errors.RemoteError("Can't contact node %s for mirror data,"
1796
                                 " aborting." % node)
1797
      time.sleep(6)
1798
      continue
1799
    rstats = rstats.payload
1800
    retries = 0
1801
    for i, mstat in enumerate(rstats):
1802
      if mstat is None:
1803
        lu.LogWarning("Can't compute data for node %s/%s",
1804
                           node, instance.disks[i].iv_name)
1805
        continue
1806
      # we ignore the ldisk parameter
1807
      perc_done, est_time, is_degraded, _ = mstat
1808
      cumul_degraded = cumul_degraded or (is_degraded and perc_done is None)
1809
      if perc_done is not None:
1810
        done = False
1811
        if est_time is not None:
1812
          rem_time = "%d estimated seconds remaining" % est_time
1813
          max_time = est_time
1814
        else:
1815
          rem_time = "no time estimate"
1816
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
1817
                        (instance.disks[i].iv_name, perc_done, rem_time))
1818

    
1819
    # if we're done but degraded, let's do a few small retries, to
1820
    # make sure we see a stable and not transient situation; therefore
1821
    # we force restart of the loop
1822
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
1823
      logging.info("Degraded disks found, %d retries left", degr_retries)
1824
      degr_retries -= 1
1825
      time.sleep(1)
1826
      continue
1827

    
1828
    if done or oneshot:
1829
      break
1830

    
1831
    time.sleep(min(60, max_time))
1832

    
1833
  if done:
1834
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
1835
  return not cumul_degraded
1836

    
1837

    
1838
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
1839
  """Check that mirrors are not degraded.
1840

1841
  The ldisk parameter, if True, will change the test from the
1842
  is_degraded attribute (which represents overall non-ok status for
1843
  the device(s)) to the ldisk (representing the local storage status).
1844

1845
  """
1846
  lu.cfg.SetDiskID(dev, node)
1847
  if ldisk:
1848
    idx = 6
1849
  else:
1850
    idx = 5
1851

    
1852
  result = True
1853
  if on_primary or dev.AssembleOnSecondary():
1854
    rstats = lu.rpc.call_blockdev_find(node, dev)
1855
    msg = rstats.fail_msg
1856
    if msg:
1857
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
1858
      result = False
1859
    elif not rstats.payload:
1860
      lu.LogWarning("Can't find disk on node %s", node)
1861
      result = False
1862
    else:
1863
      result = result and (not rstats.payload[idx])
1864
  if dev.children:
1865
    for child in dev.children:
1866
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
1867

    
1868
  return result
1869

    
1870

    
1871
class LUDiagnoseOS(NoHooksLU):
1872
  """Logical unit for OS diagnose/query.
1873

1874
  """
1875
  _OP_REQP = ["output_fields", "names"]
1876
  REQ_BGL = False
1877
  _FIELDS_STATIC = utils.FieldSet()
1878
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status")
1879

    
1880
  def ExpandNames(self):
1881
    if self.op.names:
1882
      raise errors.OpPrereqError("Selective OS query not supported")
1883

    
1884
    _CheckOutputFields(static=self._FIELDS_STATIC,
1885
                       dynamic=self._FIELDS_DYNAMIC,
1886
                       selected=self.op.output_fields)
1887

    
1888
    # Lock all nodes, in shared mode
1889
    # Temporary removal of locks, should be reverted later
1890
    # TODO: reintroduce locks when they are lighter-weight
1891
    self.needed_locks = {}
1892
    #self.share_locks[locking.LEVEL_NODE] = 1
1893
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
1894

    
1895
  def CheckPrereq(self):
1896
    """Check prerequisites.
1897

1898
    """
1899

    
1900
  @staticmethod
1901
  def _DiagnoseByOS(node_list, rlist):
1902
    """Remaps a per-node return list into an a per-os per-node dictionary
1903

1904
    @param node_list: a list with the names of all nodes
1905
    @param rlist: a map with node names as keys and OS objects as values
1906

1907
    @rtype: dict
1908
    @return: a dictionary with osnames as keys and as value another map, with
1909
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
1910

1911
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
1912
                                     (/srv/..., False, "invalid api")],
1913
                           "node2": [(/srv/..., True, "")]}
1914
          }
1915

1916
    """
1917
    all_os = {}
1918
    # we build here the list of nodes that didn't fail the RPC (at RPC
1919
    # level), so that nodes with a non-responding node daemon don't
1920
    # make all OSes invalid
1921
    good_nodes = [node_name for node_name in rlist
1922
                  if not rlist[node_name].fail_msg]
1923
    for node_name, nr in rlist.items():
1924
      if nr.fail_msg or not nr.payload:
1925
        continue
1926
      for name, path, status, diagnose in nr.payload:
1927
        if name not in all_os:
1928
          # build a list of nodes for this os containing empty lists
1929
          # for each node in node_list
1930
          all_os[name] = {}
1931
          for nname in good_nodes:
1932
            all_os[name][nname] = []
1933
        all_os[name][node_name].append((path, status, diagnose))
1934
    return all_os
1935

    
1936
  def Exec(self, feedback_fn):
1937
    """Compute the list of OSes.
1938

1939
    """
1940
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
1941
    node_data = self.rpc.call_os_diagnose(valid_nodes)
1942
    pol = self._DiagnoseByOS(valid_nodes, node_data)
1943
    output = []
1944
    for os_name, os_data in pol.items():
1945
      row = []
1946
      for field in self.op.output_fields:
1947
        if field == "name":
1948
          val = os_name
1949
        elif field == "valid":
1950
          val = utils.all([osl and osl[0][1] for osl in os_data.values()])
1951
        elif field == "node_status":
1952
          # this is just a copy of the dict
1953
          val = {}
1954
          for node_name, nos_list in os_data.items():
1955
            val[node_name] = nos_list
1956
        else:
1957
          raise errors.ParameterError(field)
1958
        row.append(val)
1959
      output.append(row)
1960

    
1961
    return output
1962

    
1963

    
1964
class LURemoveNode(LogicalUnit):
1965
  """Logical unit for removing a node.
1966

1967
  """
1968
  HPATH = "node-remove"
1969
  HTYPE = constants.HTYPE_NODE
1970
  _OP_REQP = ["node_name"]
1971

    
1972
  def BuildHooksEnv(self):
1973
    """Build hooks env.
1974

1975
    This doesn't run on the target node in the pre phase as a failed
1976
    node would then be impossible to remove.
1977

1978
    """
1979
    env = {
1980
      "OP_TARGET": self.op.node_name,
1981
      "NODE_NAME": self.op.node_name,
1982
      }
1983
    all_nodes = self.cfg.GetNodeList()
1984
    all_nodes.remove(self.op.node_name)
1985
    return env, all_nodes, all_nodes
1986

    
1987
  def CheckPrereq(self):
1988
    """Check prerequisites.
1989

1990
    This checks:
1991
     - the node exists in the configuration
1992
     - it does not have primary or secondary instances
1993
     - it's not the master
1994

1995
    Any errors are signaled by raising errors.OpPrereqError.
1996

1997
    """
1998
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
1999
    if node is None:
2000
      raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
2001

    
2002
    instance_list = self.cfg.GetInstanceList()
2003

    
2004
    masternode = self.cfg.GetMasterNode()
2005
    if node.name == masternode:
2006
      raise errors.OpPrereqError("Node is the master node,"
2007
                                 " you need to failover first.")
2008

    
2009
    for instance_name in instance_list:
2010
      instance = self.cfg.GetInstanceInfo(instance_name)
2011
      if node.name in instance.all_nodes:
2012
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2013
                                   " please remove first." % instance_name)
2014
    self.op.node_name = node.name
2015
    self.node = node
2016

    
2017
  def Exec(self, feedback_fn):
2018
    """Removes the node from the cluster.
2019

2020
    """
2021
    node = self.node
2022
    logging.info("Stopping the node daemon and removing configs from node %s",
2023
                 node.name)
2024

    
2025
    self.context.RemoveNode(node.name)
2026

    
2027
    result = self.rpc.call_node_leave_cluster(node.name)
2028
    msg = result.fail_msg
2029
    if msg:
2030
      self.LogWarning("Errors encountered on the remote node while leaving"
2031
                      " the cluster: %s", msg)
2032

    
2033
    # Promote nodes to master candidate as needed
2034
    _AdjustCandidatePool(self)
2035

    
2036

    
2037
class LUQueryNodes(NoHooksLU):
2038
  """Logical unit for querying nodes.
2039

2040
  """
2041
  _OP_REQP = ["output_fields", "names", "use_locking"]
2042
  REQ_BGL = False
2043
  _FIELDS_DYNAMIC = utils.FieldSet(
2044
    "dtotal", "dfree",
2045
    "mtotal", "mnode", "mfree",
2046
    "bootid",
2047
    "ctotal", "cnodes", "csockets",
2048
    )
2049

    
2050
  _FIELDS_STATIC = utils.FieldSet(
2051
    "name", "pinst_cnt", "sinst_cnt",
2052
    "pinst_list", "sinst_list",
2053
    "pip", "sip", "tags",
2054
    "serial_no",
2055
    "master_candidate",
2056
    "master",
2057
    "offline",
2058
    "drained",
2059
    "role",
2060
    )
2061

    
2062
  def ExpandNames(self):
2063
    _CheckOutputFields(static=self._FIELDS_STATIC,
2064
                       dynamic=self._FIELDS_DYNAMIC,
2065
                       selected=self.op.output_fields)
2066

    
2067
    self.needed_locks = {}
2068
    self.share_locks[locking.LEVEL_NODE] = 1
2069

    
2070
    if self.op.names:
2071
      self.wanted = _GetWantedNodes(self, self.op.names)
2072
    else:
2073
      self.wanted = locking.ALL_SET
2074

    
2075
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2076
    self.do_locking = self.do_node_query and self.op.use_locking
2077
    if self.do_locking:
2078
      # if we don't request only static fields, we need to lock the nodes
2079
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2080

    
2081

    
2082
  def CheckPrereq(self):
2083
    """Check prerequisites.
2084

2085
    """
2086
    # The validation of the node list is done in the _GetWantedNodes,
2087
    # if non empty, and if empty, there's no validation to do
2088
    pass
2089

    
2090
  def Exec(self, feedback_fn):
2091
    """Computes the list of nodes and their attributes.
2092

2093
    """
2094
    all_info = self.cfg.GetAllNodesInfo()
2095
    if self.do_locking:
2096
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2097
    elif self.wanted != locking.ALL_SET:
2098
      nodenames = self.wanted
2099
      missing = set(nodenames).difference(all_info.keys())
2100
      if missing:
2101
        raise errors.OpExecError(
2102
          "Some nodes were removed before retrieving their data: %s" % missing)
2103
    else:
2104
      nodenames = all_info.keys()
2105

    
2106
    nodenames = utils.NiceSort(nodenames)
2107
    nodelist = [all_info[name] for name in nodenames]
2108

    
2109
    # begin data gathering
2110

    
2111
    if self.do_node_query:
2112
      live_data = {}
2113
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2114
                                          self.cfg.GetHypervisorType())
2115
      for name in nodenames:
2116
        nodeinfo = node_data[name]
2117
        if not nodeinfo.fail_msg and nodeinfo.payload:
2118
          nodeinfo = nodeinfo.payload
2119
          fn = utils.TryConvert
2120
          live_data[name] = {
2121
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2122
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2123
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2124
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2125
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2126
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2127
            "bootid": nodeinfo.get('bootid', None),
2128
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2129
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2130
            }
2131
        else:
2132
          live_data[name] = {}
2133
    else:
2134
      live_data = dict.fromkeys(nodenames, {})
2135

    
2136
    node_to_primary = dict([(name, set()) for name in nodenames])
2137
    node_to_secondary = dict([(name, set()) for name in nodenames])
2138

    
2139
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2140
                             "sinst_cnt", "sinst_list"))
2141
    if inst_fields & frozenset(self.op.output_fields):
2142
      instancelist = self.cfg.GetInstanceList()
2143

    
2144
      for instance_name in instancelist:
2145
        inst = self.cfg.GetInstanceInfo(instance_name)
2146
        if inst.primary_node in node_to_primary:
2147
          node_to_primary[inst.primary_node].add(inst.name)
2148
        for secnode in inst.secondary_nodes:
2149
          if secnode in node_to_secondary:
2150
            node_to_secondary[secnode].add(inst.name)
2151

    
2152
    master_node = self.cfg.GetMasterNode()
2153

    
2154
    # end data gathering
2155

    
2156
    output = []
2157
    for node in nodelist:
2158
      node_output = []
2159
      for field in self.op.output_fields:
2160
        if field == "name":
2161
          val = node.name
2162
        elif field == "pinst_list":
2163
          val = list(node_to_primary[node.name])
2164
        elif field == "sinst_list":
2165
          val = list(node_to_secondary[node.name])
2166
        elif field == "pinst_cnt":
2167
          val = len(node_to_primary[node.name])
2168
        elif field == "sinst_cnt":
2169
          val = len(node_to_secondary[node.name])
2170
        elif field == "pip":
2171
          val = node.primary_ip
2172
        elif field == "sip":
2173
          val = node.secondary_ip
2174
        elif field == "tags":
2175
          val = list(node.GetTags())
2176
        elif field == "serial_no":
2177
          val = node.serial_no
2178
        elif field == "master_candidate":
2179
          val = node.master_candidate
2180
        elif field == "master":
2181
          val = node.name == master_node
2182
        elif field == "offline":
2183
          val = node.offline
2184
        elif field == "drained":
2185
          val = node.drained
2186
        elif self._FIELDS_DYNAMIC.Matches(field):
2187
          val = live_data[node.name].get(field, None)
2188
        elif field == "role":
2189
          if node.name == master_node:
2190
            val = "M"
2191
          elif node.master_candidate:
2192
            val = "C"
2193
          elif node.drained:
2194
            val = "D"
2195
          elif node.offline:
2196
            val = "O"
2197
          else:
2198
            val = "R"
2199
        else:
2200
          raise errors.ParameterError(field)
2201
        node_output.append(val)
2202
      output.append(node_output)
2203

    
2204
    return output
2205

    
2206

    
2207
class LUQueryNodeVolumes(NoHooksLU):
2208
  """Logical unit for getting volumes on node(s).
2209

2210
  """
2211
  _OP_REQP = ["nodes", "output_fields"]
2212
  REQ_BGL = False
2213
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2214
  _FIELDS_STATIC = utils.FieldSet("node")
2215

    
2216
  def ExpandNames(self):
2217
    _CheckOutputFields(static=self._FIELDS_STATIC,
2218
                       dynamic=self._FIELDS_DYNAMIC,
2219
                       selected=self.op.output_fields)
2220

    
2221
    self.needed_locks = {}
2222
    self.share_locks[locking.LEVEL_NODE] = 1
2223
    if not self.op.nodes:
2224
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2225
    else:
2226
      self.needed_locks[locking.LEVEL_NODE] = \
2227
        _GetWantedNodes(self, self.op.nodes)
2228

    
2229
  def CheckPrereq(self):
2230
    """Check prerequisites.
2231

2232
    This checks that the fields required are valid output fields.
2233

2234
    """
2235
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2236

    
2237
  def Exec(self, feedback_fn):
2238
    """Computes the list of nodes and their attributes.
2239

2240
    """
2241
    nodenames = self.nodes
2242
    volumes = self.rpc.call_node_volumes(nodenames)
2243

    
2244
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2245
             in self.cfg.GetInstanceList()]
2246

    
2247
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2248

    
2249
    output = []
2250
    for node in nodenames:
2251
      nresult = volumes[node]
2252
      if nresult.offline:
2253
        continue
2254
      msg = nresult.fail_msg
2255
      if msg:
2256
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2257
        continue
2258

    
2259
      node_vols = nresult.payload[:]
2260
      node_vols.sort(key=lambda vol: vol['dev'])
2261

    
2262
      for vol in node_vols:
2263
        node_output = []
2264
        for field in self.op.output_fields:
2265
          if field == "node":
2266
            val = node
2267
          elif field == "phys":
2268
            val = vol['dev']
2269
          elif field == "vg":
2270
            val = vol['vg']
2271
          elif field == "name":
2272
            val = vol['name']
2273
          elif field == "size":
2274
            val = int(float(vol['size']))
2275
          elif field == "instance":
2276
            for inst in ilist:
2277
              if node not in lv_by_node[inst]:
2278
                continue
2279
              if vol['name'] in lv_by_node[inst][node]:
2280
                val = inst.name
2281
                break
2282
            else:
2283
              val = '-'
2284
          else:
2285
            raise errors.ParameterError(field)
2286
          node_output.append(str(val))
2287

    
2288
        output.append(node_output)
2289

    
2290
    return output
2291

    
2292

    
2293
class LUAddNode(LogicalUnit):
2294
  """Logical unit for adding node to the cluster.
2295

2296
  """
2297
  HPATH = "node-add"
2298
  HTYPE = constants.HTYPE_NODE
2299
  _OP_REQP = ["node_name"]
2300

    
2301
  def BuildHooksEnv(self):
2302
    """Build hooks env.
2303

2304
    This will run on all nodes before, and on all nodes + the new node after.
2305

2306
    """
2307
    env = {
2308
      "OP_TARGET": self.op.node_name,
2309
      "NODE_NAME": self.op.node_name,
2310
      "NODE_PIP": self.op.primary_ip,
2311
      "NODE_SIP": self.op.secondary_ip,
2312
      }
2313
    nodes_0 = self.cfg.GetNodeList()
2314
    nodes_1 = nodes_0 + [self.op.node_name, ]
2315
    return env, nodes_0, nodes_1
2316

    
2317
  def CheckPrereq(self):
2318
    """Check prerequisites.
2319

2320
    This checks:
2321
     - the new node is not already in the config
2322
     - it is resolvable
2323
     - its parameters (single/dual homed) matches the cluster
2324

2325
    Any errors are signaled by raising errors.OpPrereqError.
2326

2327
    """
2328
    node_name = self.op.node_name
2329
    cfg = self.cfg
2330

    
2331
    dns_data = utils.HostInfo(node_name)
2332

    
2333
    node = dns_data.name
2334
    primary_ip = self.op.primary_ip = dns_data.ip
2335
    secondary_ip = getattr(self.op, "secondary_ip", None)
2336
    if secondary_ip is None:
2337
      secondary_ip = primary_ip
2338
    if not utils.IsValidIP(secondary_ip):
2339
      raise errors.OpPrereqError("Invalid secondary IP given")
2340
    self.op.secondary_ip = secondary_ip
2341

    
2342
    node_list = cfg.GetNodeList()
2343
    if not self.op.readd and node in node_list:
2344
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2345
                                 node)
2346
    elif self.op.readd and node not in node_list:
2347
      raise errors.OpPrereqError("Node %s is not in the configuration" % node)
2348

    
2349
    for existing_node_name in node_list:
2350
      existing_node = cfg.GetNodeInfo(existing_node_name)
2351

    
2352
      if self.op.readd and node == existing_node_name:
2353
        if (existing_node.primary_ip != primary_ip or
2354
            existing_node.secondary_ip != secondary_ip):
2355
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2356
                                     " address configuration as before")
2357
        continue
2358

    
2359
      if (existing_node.primary_ip == primary_ip or
2360
          existing_node.secondary_ip == primary_ip or
2361
          existing_node.primary_ip == secondary_ip or
2362
          existing_node.secondary_ip == secondary_ip):
2363
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2364
                                   " existing node %s" % existing_node.name)
2365

    
2366
    # check that the type of the node (single versus dual homed) is the
2367
    # same as for the master
2368
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2369
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2370
    newbie_singlehomed = secondary_ip == primary_ip
2371
    if master_singlehomed != newbie_singlehomed:
2372
      if master_singlehomed:
2373
        raise errors.OpPrereqError("The master has no private ip but the"
2374
                                   " new node has one")
2375
      else:
2376
        raise errors.OpPrereqError("The master has a private ip but the"
2377
                                   " new node doesn't have one")
2378

    
2379
    # checks reachability
2380
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2381
      raise errors.OpPrereqError("Node not reachable by ping")
2382

    
2383
    if not newbie_singlehomed:
2384
      # check reachability from my secondary ip to newbie's secondary ip
2385
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2386
                           source=myself.secondary_ip):
2387
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2388
                                   " based ping to noded port")
2389

    
2390
    cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2391
    if self.op.readd:
2392
      exceptions = [node]
2393
    else:
2394
      exceptions = []
2395
    mc_now, mc_max = self.cfg.GetMasterCandidateStats(exceptions)
2396
    # the new node will increase mc_max with one, so:
2397
    mc_max = min(mc_max + 1, cp_size)
2398
    self.master_candidate = mc_now < mc_max
2399

    
2400
    if self.op.readd:
2401
      self.new_node = self.cfg.GetNodeInfo(node)
2402
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
2403
    else:
2404
      self.new_node = objects.Node(name=node,
2405
                                   primary_ip=primary_ip,
2406
                                   secondary_ip=secondary_ip,
2407
                                   master_candidate=self.master_candidate,
2408
                                   offline=False, drained=False)
2409

    
2410
  def Exec(self, feedback_fn):
2411
    """Adds the new node to the cluster.
2412

2413
    """
2414
    new_node = self.new_node
2415
    node = new_node.name
2416

    
2417
    # for re-adds, reset the offline/drained/master-candidate flags;
2418
    # we need to reset here, otherwise offline would prevent RPC calls
2419
    # later in the procedure; this also means that if the re-add
2420
    # fails, we are left with a non-offlined, broken node
2421
    if self.op.readd:
2422
      new_node.drained = new_node.offline = False
2423
      self.LogInfo("Readding a node, the offline/drained flags were reset")
2424
      # if we demote the node, we do cleanup later in the procedure
2425
      new_node.master_candidate = self.master_candidate
2426

    
2427
    # notify the user about any possible mc promotion
2428
    if new_node.master_candidate:
2429
      self.LogInfo("Node will be a master candidate")
2430

    
2431
    # check connectivity
2432
    result = self.rpc.call_version([node])[node]
2433
    result.Raise("Can't get version information from node %s" % node)
2434
    if constants.PROTOCOL_VERSION == result.payload:
2435
      logging.info("Communication to node %s fine, sw version %s match",
2436
                   node, result.payload)
2437
    else:
2438
      raise errors.OpExecError("Version mismatch master version %s,"
2439
                               " node version %s" %
2440
                               (constants.PROTOCOL_VERSION, result.payload))
2441

    
2442
    # setup ssh on node
2443
    logging.info("Copy ssh key to node %s", node)
2444
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
2445
    keyarray = []
2446
    keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
2447
                constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
2448
                priv_key, pub_key]
2449

    
2450
    for i in keyfiles:
2451
      f = open(i, 'r')
2452
      try:
2453
        keyarray.append(f.read())
2454
      finally:
2455
        f.close()
2456

    
2457
    result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
2458
                                    keyarray[2],
2459
                                    keyarray[3], keyarray[4], keyarray[5])
2460
    result.Raise("Cannot transfer ssh keys to the new node")
2461

    
2462
    # Add node to our /etc/hosts, and add key to known_hosts
2463
    if self.cfg.GetClusterInfo().modify_etc_hosts:
2464
      utils.AddHostToEtcHosts(new_node.name)
2465

    
2466
    if new_node.secondary_ip != new_node.primary_ip:
2467
      result = self.rpc.call_node_has_ip_address(new_node.name,
2468
                                                 new_node.secondary_ip)
2469
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
2470
                   prereq=True)
2471
      if not result.payload:
2472
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
2473
                                 " you gave (%s). Please fix and re-run this"
2474
                                 " command." % new_node.secondary_ip)
2475

    
2476
    node_verify_list = [self.cfg.GetMasterNode()]
2477
    node_verify_param = {
2478
      'nodelist': [node],
2479
      # TODO: do a node-net-test as well?
2480
    }
2481

    
2482
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2483
                                       self.cfg.GetClusterName())
2484
    for verifier in node_verify_list:
2485
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
2486
      nl_payload = result[verifier].payload['nodelist']
2487
      if nl_payload:
2488
        for failed in nl_payload:
2489
          feedback_fn("ssh/hostname verification failed %s -> %s" %
2490
                      (verifier, nl_payload[failed]))
2491
        raise errors.OpExecError("ssh/hostname verification failed.")
2492

    
2493
    if self.op.readd:
2494
      _RedistributeAncillaryFiles(self)
2495
      self.context.ReaddNode(new_node)
2496
      # make sure we redistribute the config
2497
      self.cfg.Update(new_node)
2498
      # and make sure the new node will not have old files around
2499
      if not new_node.master_candidate:
2500
        result = self.rpc.call_node_demote_from_mc(new_node.name)
2501
        msg = result.RemoteFailMsg()
2502
        if msg:
2503
          self.LogWarning("Node failed to demote itself from master"
2504
                          " candidate status: %s" % msg)
2505
    else:
2506
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
2507
      self.context.AddNode(new_node)
2508

    
2509

    
2510
class LUSetNodeParams(LogicalUnit):
2511
  """Modifies the parameters of a node.
2512

2513
  """
2514
  HPATH = "node-modify"
2515
  HTYPE = constants.HTYPE_NODE
2516
  _OP_REQP = ["node_name"]
2517
  REQ_BGL = False
2518

    
2519
  def CheckArguments(self):
2520
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2521
    if node_name is None:
2522
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2523
    self.op.node_name = node_name
2524
    _CheckBooleanOpField(self.op, 'master_candidate')
2525
    _CheckBooleanOpField(self.op, 'offline')
2526
    _CheckBooleanOpField(self.op, 'drained')
2527
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
2528
    if all_mods.count(None) == 3:
2529
      raise errors.OpPrereqError("Please pass at least one modification")
2530
    if all_mods.count(True) > 1:
2531
      raise errors.OpPrereqError("Can't set the node into more than one"
2532
                                 " state at the same time")
2533

    
2534
  def ExpandNames(self):
2535
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
2536

    
2537
  def BuildHooksEnv(self):
2538
    """Build hooks env.
2539

2540
    This runs on the master node.
2541

2542
    """
2543
    env = {
2544
      "OP_TARGET": self.op.node_name,
2545
      "MASTER_CANDIDATE": str(self.op.master_candidate),
2546
      "OFFLINE": str(self.op.offline),
2547
      "DRAINED": str(self.op.drained),
2548
      }
2549
    nl = [self.cfg.GetMasterNode(),
2550
          self.op.node_name]
2551
    return env, nl, nl
2552

    
2553
  def CheckPrereq(self):
2554
    """Check prerequisites.
2555

2556
    This only checks the instance list against the existing names.
2557

2558
    """
2559
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
2560

    
2561
    if ((self.op.master_candidate == False or self.op.offline == True or
2562
         self.op.drained == True) and node.master_candidate):
2563
      # we will demote the node from master_candidate
2564
      if self.op.node_name == self.cfg.GetMasterNode():
2565
        raise errors.OpPrereqError("The master node has to be a"
2566
                                   " master candidate, online and not drained")
2567
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2568
      num_candidates, _ = self.cfg.GetMasterCandidateStats()
2569
      if num_candidates <= cp_size:
2570
        msg = ("Not enough master candidates (desired"
2571
               " %d, new value will be %d)" % (cp_size, num_candidates-1))
2572
        if self.op.force:
2573
          self.LogWarning(msg)
2574
        else:
2575
          raise errors.OpPrereqError(msg)
2576

    
2577
    if (self.op.master_candidate == True and
2578
        ((node.offline and not self.op.offline == False) or
2579
         (node.drained and not self.op.drained == False))):
2580
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
2581
                                 " to master_candidate" % node.name)
2582

    
2583
    return
2584

    
2585
  def Exec(self, feedback_fn):
2586
    """Modifies a node.
2587

2588
    """
2589
    node = self.node
2590

    
2591
    result = []
2592
    changed_mc = False
2593

    
2594
    if self.op.offline is not None:
2595
      node.offline = self.op.offline
2596
      result.append(("offline", str(self.op.offline)))
2597
      if self.op.offline == True:
2598
        if node.master_candidate:
2599
          node.master_candidate = False
2600
          changed_mc = True
2601
          result.append(("master_candidate", "auto-demotion due to offline"))
2602
        if node.drained:
2603
          node.drained = False
2604
          result.append(("drained", "clear drained status due to offline"))
2605

    
2606
    if self.op.master_candidate is not None:
2607
      node.master_candidate = self.op.master_candidate
2608
      changed_mc = True
2609
      result.append(("master_candidate", str(self.op.master_candidate)))
2610
      if self.op.master_candidate == False:
2611
        rrc = self.rpc.call_node_demote_from_mc(node.name)
2612
        msg = rrc.fail_msg
2613
        if msg:
2614
          self.LogWarning("Node failed to demote itself: %s" % msg)
2615

    
2616
    if self.op.drained is not None:
2617
      node.drained = self.op.drained
2618
      result.append(("drained", str(self.op.drained)))
2619
      if self.op.drained == True:
2620
        if node.master_candidate:
2621
          node.master_candidate = False
2622
          changed_mc = True
2623
          result.append(("master_candidate", "auto-demotion due to drain"))
2624
          rrc = self.rpc.call_node_demote_from_mc(node.name)
2625
          msg = rrc.RemoteFailMsg()
2626
          if msg:
2627
            self.LogWarning("Node failed to demote itself: %s" % msg)
2628
        if node.offline:
2629
          node.offline = False
2630
          result.append(("offline", "clear offline status due to drain"))
2631

    
2632
    # this will trigger configuration file update, if needed
2633
    self.cfg.Update(node)
2634
    # this will trigger job queue propagation or cleanup
2635
    if changed_mc:
2636
      self.context.ReaddNode(node)
2637

    
2638
    return result
2639

    
2640

    
2641
class LUPowercycleNode(NoHooksLU):
2642
  """Powercycles a node.
2643

2644
  """
2645
  _OP_REQP = ["node_name", "force"]
2646
  REQ_BGL = False
2647

    
2648
  def CheckArguments(self):
2649
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2650
    if node_name is None:
2651
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2652
    self.op.node_name = node_name
2653
    if node_name == self.cfg.GetMasterNode() and not self.op.force:
2654
      raise errors.OpPrereqError("The node is the master and the force"
2655
                                 " parameter was not set")
2656

    
2657
  def ExpandNames(self):
2658
    """Locking for PowercycleNode.
2659

2660
    This is a last-resource option and shouldn't block on other
2661
    jobs. Therefore, we grab no locks.
2662

2663
    """
2664
    self.needed_locks = {}
2665

    
2666
  def CheckPrereq(self):
2667
    """Check prerequisites.
2668

2669
    This LU has no prereqs.
2670

2671
    """
2672
    pass
2673

    
2674
  def Exec(self, feedback_fn):
2675
    """Reboots a node.
2676

2677
    """
2678
    result = self.rpc.call_node_powercycle(self.op.node_name,
2679
                                           self.cfg.GetHypervisorType())
2680
    result.Raise("Failed to schedule the reboot")
2681
    return result.payload
2682

    
2683

    
2684
class LUQueryClusterInfo(NoHooksLU):
2685
  """Query cluster configuration.
2686

2687
  """
2688
  _OP_REQP = []
2689
  REQ_BGL = False
2690

    
2691
  def ExpandNames(self):
2692
    self.needed_locks = {}
2693

    
2694
  def CheckPrereq(self):
2695
    """No prerequsites needed for this LU.
2696

2697
    """
2698
    pass
2699

    
2700
  def Exec(self, feedback_fn):
2701
    """Return cluster config.
2702

2703
    """
2704
    cluster = self.cfg.GetClusterInfo()
2705
    result = {
2706
      "software_version": constants.RELEASE_VERSION,
2707
      "protocol_version": constants.PROTOCOL_VERSION,
2708
      "config_version": constants.CONFIG_VERSION,
2709
      "os_api_version": max(constants.OS_API_VERSIONS),
2710
      "export_version": constants.EXPORT_VERSION,
2711
      "architecture": (platform.architecture()[0], platform.machine()),
2712
      "name": cluster.cluster_name,
2713
      "master": cluster.master_node,
2714
      "default_hypervisor": cluster.enabled_hypervisors[0],
2715
      "enabled_hypervisors": cluster.enabled_hypervisors,
2716
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
2717
                        for hypervisor_name in cluster.enabled_hypervisors]),
2718
      "beparams": cluster.beparams,
2719
      "nicparams": cluster.nicparams,
2720
      "candidate_pool_size": cluster.candidate_pool_size,
2721
      "master_netdev": cluster.master_netdev,
2722
      "volume_group_name": cluster.volume_group_name,
2723
      "file_storage_dir": cluster.file_storage_dir,
2724
      }
2725

    
2726
    return result
2727

    
2728

    
2729
class LUQueryConfigValues(NoHooksLU):
2730
  """Return configuration values.
2731

2732
  """
2733
  _OP_REQP = []
2734
  REQ_BGL = False
2735
  _FIELDS_DYNAMIC = utils.FieldSet()
2736
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag")
2737

    
2738
  def ExpandNames(self):
2739
    self.needed_locks = {}
2740

    
2741
    _CheckOutputFields(static=self._FIELDS_STATIC,
2742
                       dynamic=self._FIELDS_DYNAMIC,
2743
                       selected=self.op.output_fields)
2744

    
2745
  def CheckPrereq(self):
2746
    """No prerequisites.
2747

2748
    """
2749
    pass
2750

    
2751
  def Exec(self, feedback_fn):
2752
    """Dump a representation of the cluster config to the standard output.
2753

2754
    """
2755
    values = []
2756
    for field in self.op.output_fields:
2757
      if field == "cluster_name":
2758
        entry = self.cfg.GetClusterName()
2759
      elif field == "master_node":
2760
        entry = self.cfg.GetMasterNode()
2761
      elif field == "drain_flag":
2762
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
2763
      else:
2764
        raise errors.ParameterError(field)
2765
      values.append(entry)
2766
    return values
2767

    
2768

    
2769
class LUActivateInstanceDisks(NoHooksLU):
2770
  """Bring up an instance's disks.
2771

2772
  """
2773
  _OP_REQP = ["instance_name"]
2774
  REQ_BGL = False
2775

    
2776
  def ExpandNames(self):
2777
    self._ExpandAndLockInstance()
2778
    self.needed_locks[locking.LEVEL_NODE] = []
2779
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2780

    
2781
  def DeclareLocks(self, level):
2782
    if level == locking.LEVEL_NODE:
2783
      self._LockInstancesNodes()
2784

    
2785
  def CheckPrereq(self):
2786
    """Check prerequisites.
2787

2788
    This checks that the instance is in the cluster.
2789

2790
    """
2791
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2792
    assert self.instance is not None, \
2793
      "Cannot retrieve locked instance %s" % self.op.instance_name
2794
    _CheckNodeOnline(self, self.instance.primary_node)
2795

    
2796
  def Exec(self, feedback_fn):
2797
    """Activate the disks.
2798

2799
    """
2800
    disks_ok, disks_info = _AssembleInstanceDisks(self, self.instance)
2801
    if not disks_ok:
2802
      raise errors.OpExecError("Cannot activate block devices")
2803

    
2804
    return disks_info
2805

    
2806

    
2807
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
2808
  """Prepare the block devices for an instance.
2809

2810
  This sets up the block devices on all nodes.
2811

2812
  @type lu: L{LogicalUnit}
2813
  @param lu: the logical unit on whose behalf we execute
2814
  @type instance: L{objects.Instance}
2815
  @param instance: the instance for whose disks we assemble
2816
  @type ignore_secondaries: boolean
2817
  @param ignore_secondaries: if true, errors on secondary nodes
2818
      won't result in an error return from the function
2819
  @return: False if the operation failed, otherwise a list of
2820
      (host, instance_visible_name, node_visible_name)
2821
      with the mapping from node devices to instance devices
2822

2823
  """
2824
  device_info = []
2825
  disks_ok = True
2826
  iname = instance.name
2827
  # With the two passes mechanism we try to reduce the window of
2828
  # opportunity for the race condition of switching DRBD to primary
2829
  # before handshaking occured, but we do not eliminate it
2830

    
2831
  # The proper fix would be to wait (with some limits) until the
2832
  # connection has been made and drbd transitions from WFConnection
2833
  # into any other network-connected state (Connected, SyncTarget,
2834
  # SyncSource, etc.)
2835

    
2836
  # 1st pass, assemble on all nodes in secondary mode
2837
  for inst_disk in instance.disks:
2838
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2839
      lu.cfg.SetDiskID(node_disk, node)
2840
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
2841
      msg = result.fail_msg
2842
      if msg:
2843
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
2844
                           " (is_primary=False, pass=1): %s",
2845
                           inst_disk.iv_name, node, msg)
2846
        if not ignore_secondaries:
2847
          disks_ok = False
2848

    
2849
  # FIXME: race condition on drbd migration to primary
2850

    
2851
  # 2nd pass, do only the primary node
2852
  for inst_disk in instance.disks:
2853
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2854
      if node != instance.primary_node:
2855
        continue
2856
      lu.cfg.SetDiskID(node_disk, node)
2857
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
2858
      msg = result.fail_msg
2859
      if msg:
2860
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
2861
                           " (is_primary=True, pass=2): %s",
2862
                           inst_disk.iv_name, node, msg)
2863
        disks_ok = False
2864
    device_info.append((instance.primary_node, inst_disk.iv_name,
2865
                        result.payload))
2866

    
2867
  # leave the disks configured for the primary node
2868
  # this is a workaround that would be fixed better by
2869
  # improving the logical/physical id handling
2870
  for disk in instance.disks:
2871
    lu.cfg.SetDiskID(disk, instance.primary_node)
2872

    
2873
  return disks_ok, device_info
2874

    
2875

    
2876
def _StartInstanceDisks(lu, instance, force):
2877
  """Start the disks of an instance.
2878

2879
  """
2880
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
2881
                                           ignore_secondaries=force)
2882
  if not disks_ok:
2883
    _ShutdownInstanceDisks(lu, instance)
2884
    if force is not None and not force:
2885
      lu.proc.LogWarning("", hint="If the message above refers to a"
2886
                         " secondary node,"
2887
                         " you can retry the operation using '--force'.")
2888
    raise errors.OpExecError("Disk consistency error")
2889

    
2890

    
2891
class LUDeactivateInstanceDisks(NoHooksLU):
2892
  """Shutdown an instance's disks.
2893

2894
  """
2895
  _OP_REQP = ["instance_name"]
2896
  REQ_BGL = False
2897

    
2898
  def ExpandNames(self):
2899
    self._ExpandAndLockInstance()
2900
    self.needed_locks[locking.LEVEL_NODE] = []
2901
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2902

    
2903
  def DeclareLocks(self, level):
2904
    if level == locking.LEVEL_NODE:
2905
      self._LockInstancesNodes()
2906

    
2907
  def CheckPrereq(self):
2908
    """Check prerequisites.
2909

2910
    This checks that the instance is in the cluster.
2911

2912
    """
2913
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2914
    assert self.instance is not None, \
2915
      "Cannot retrieve locked instance %s" % self.op.instance_name
2916

    
2917
  def Exec(self, feedback_fn):
2918
    """Deactivate the disks
2919

2920
    """
2921
    instance = self.instance
2922
    _SafeShutdownInstanceDisks(self, instance)
2923

    
2924

    
2925
def _SafeShutdownInstanceDisks(lu, instance):
2926
  """Shutdown block devices of an instance.
2927

2928
  This function checks if an instance is running, before calling
2929
  _ShutdownInstanceDisks.
2930

2931
  """
2932
  pnode = instance.primary_node
2933
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
2934
  ins_l.Raise("Can't contact node %s" % pnode)
2935

    
2936
  if instance.name in ins_l.payload:
2937
    raise errors.OpExecError("Instance is running, can't shutdown"
2938
                             " block devices.")
2939

    
2940
  _ShutdownInstanceDisks(lu, instance)
2941

    
2942

    
2943
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
2944
  """Shutdown block devices of an instance.
2945

2946
  This does the shutdown on all nodes of the instance.
2947

2948
  If the ignore_primary is false, errors on the primary node are
2949
  ignored.
2950

2951
  """
2952
  all_result = True
2953
  for disk in instance.disks:
2954
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
2955
      lu.cfg.SetDiskID(top_disk, node)
2956
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
2957
      msg = result.fail_msg
2958
      if msg:
2959
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
2960
                      disk.iv_name, node, msg)
2961
        if not ignore_primary or node != instance.primary_node:
2962
          all_result = False
2963
  return all_result
2964

    
2965

    
2966
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
2967
  """Checks if a node has enough free memory.
2968

2969
  This function check if a given node has the needed amount of free
2970
  memory. In case the node has less memory or we cannot get the
2971
  information from the node, this function raise an OpPrereqError
2972
  exception.
2973

2974
  @type lu: C{LogicalUnit}
2975
  @param lu: a logical unit from which we get configuration data
2976
  @type node: C{str}
2977
  @param node: the node to check
2978
  @type reason: C{str}
2979
  @param reason: string to use in the error message
2980
  @type requested: C{int}
2981
  @param requested: the amount of memory in MiB to check for
2982
  @type hypervisor_name: C{str}
2983
  @param hypervisor_name: the hypervisor to ask for memory stats
2984
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
2985
      we cannot check the node
2986

2987
  """
2988
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
2989
  nodeinfo[node].Raise("Can't get data from node %s" % node, prereq=True)
2990
  free_mem = nodeinfo[node].payload.get('memory_free', None)
2991
  if not isinstance(free_mem, int):
2992
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
2993
                               " was '%s'" % (node, free_mem))
2994
  if requested > free_mem:
2995
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
2996
                               " needed %s MiB, available %s MiB" %
2997
                               (node, reason, requested, free_mem))
2998

    
2999

    
3000
class LUStartupInstance(LogicalUnit):
3001
  """Starts an instance.
3002

3003
  """
3004
  HPATH = "instance-start"
3005
  HTYPE = constants.HTYPE_INSTANCE
3006
  _OP_REQP = ["instance_name", "force"]
3007
  REQ_BGL = False
3008

    
3009
  def ExpandNames(self):
3010
    self._ExpandAndLockInstance()
3011

    
3012
  def BuildHooksEnv(self):
3013
    """Build hooks env.
3014

3015
    This runs on master, primary and secondary nodes of the instance.
3016

3017
    """
3018
    env = {
3019
      "FORCE": self.op.force,
3020
      }
3021
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3022
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3023
    return env, nl, nl
3024

    
3025
  def CheckPrereq(self):
3026
    """Check prerequisites.
3027

3028
    This checks that the instance is in the cluster.
3029

3030
    """
3031
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3032
    assert self.instance is not None, \
3033
      "Cannot retrieve locked instance %s" % self.op.instance_name
3034

    
3035
    # extra beparams
3036
    self.beparams = getattr(self.op, "beparams", {})
3037
    if self.beparams:
3038
      if not isinstance(self.beparams, dict):
3039
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3040
                                   " dict" % (type(self.beparams), ))
3041
      # fill the beparams dict
3042
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3043
      self.op.beparams = self.beparams
3044

    
3045
    # extra hvparams
3046
    self.hvparams = getattr(self.op, "hvparams", {})
3047
    if self.hvparams:
3048
      if not isinstance(self.hvparams, dict):
3049
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3050
                                   " dict" % (type(self.hvparams), ))
3051

    
3052
      # check hypervisor parameter syntax (locally)
3053
      cluster = self.cfg.GetClusterInfo()
3054
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3055
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3056
                                    instance.hvparams)
3057
      filled_hvp.update(self.hvparams)
3058
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3059
      hv_type.CheckParameterSyntax(filled_hvp)
3060
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3061
      self.op.hvparams = self.hvparams
3062

    
3063
    _CheckNodeOnline(self, instance.primary_node)
3064

    
3065
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3066
    # check bridges existence
3067
    _CheckInstanceBridgesExist(self, instance)
3068

    
3069
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3070
                                              instance.name,
3071
                                              instance.hypervisor)
3072
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3073
                      prereq=True)
3074
    if not remote_info.payload: # not running already
3075
      _CheckNodeFreeMemory(self, instance.primary_node,
3076
                           "starting instance %s" % instance.name,
3077
                           bep[constants.BE_MEMORY], instance.hypervisor)
3078

    
3079
  def Exec(self, feedback_fn):
3080
    """Start the instance.
3081

3082
    """
3083
    instance = self.instance
3084
    force = self.op.force
3085

    
3086
    self.cfg.MarkInstanceUp(instance.name)
3087

    
3088
    node_current = instance.primary_node
3089

    
3090
    _StartInstanceDisks(self, instance, force)
3091

    
3092
    result = self.rpc.call_instance_start(node_current, instance,
3093
                                          self.hvparams, self.beparams)
3094
    msg = result.fail_msg
3095
    if msg:
3096
      _ShutdownInstanceDisks(self, instance)
3097
      raise errors.OpExecError("Could not start instance: %s" % msg)
3098

    
3099

    
3100
class LURebootInstance(LogicalUnit):
3101
  """Reboot an instance.
3102

3103
  """
3104
  HPATH = "instance-reboot"
3105
  HTYPE = constants.HTYPE_INSTANCE
3106
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3107
  REQ_BGL = False
3108

    
3109
  def ExpandNames(self):
3110
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3111
                                   constants.INSTANCE_REBOOT_HARD,
3112
                                   constants.INSTANCE_REBOOT_FULL]:
3113
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3114
                                  (constants.INSTANCE_REBOOT_SOFT,
3115
                                   constants.INSTANCE_REBOOT_HARD,
3116
                                   constants.INSTANCE_REBOOT_FULL))
3117
    self._ExpandAndLockInstance()
3118

    
3119
  def BuildHooksEnv(self):
3120
    """Build hooks env.
3121

3122
    This runs on master, primary and secondary nodes of the instance.
3123

3124
    """
3125
    env = {
3126
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3127
      "REBOOT_TYPE": self.op.reboot_type,
3128
      }
3129
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3130
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3131
    return env, nl, nl
3132

    
3133
  def CheckPrereq(self):
3134
    """Check prerequisites.
3135

3136
    This checks that the instance is in the cluster.
3137

3138
    """
3139
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3140
    assert self.instance is not None, \
3141
      "Cannot retrieve locked instance %s" % self.op.instance_name
3142

    
3143
    _CheckNodeOnline(self, instance.primary_node)
3144

    
3145
    # check bridges existence
3146
    _CheckInstanceBridgesExist(self, instance)
3147

    
3148
  def Exec(self, feedback_fn):
3149
    """Reboot the instance.
3150

3151
    """
3152
    instance = self.instance
3153
    ignore_secondaries = self.op.ignore_secondaries
3154
    reboot_type = self.op.reboot_type
3155

    
3156
    node_current = instance.primary_node
3157

    
3158
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3159
                       constants.INSTANCE_REBOOT_HARD]:
3160
      for disk in instance.disks:
3161
        self.cfg.SetDiskID(disk, node_current)
3162
      result = self.rpc.call_instance_reboot(node_current, instance,
3163
                                             reboot_type)
3164
      result.Raise("Could not reboot instance")
3165
    else:
3166
      result = self.rpc.call_instance_shutdown(node_current, instance)
3167
      result.Raise("Could not shutdown instance for full reboot")
3168
      _ShutdownInstanceDisks(self, instance)
3169
      _StartInstanceDisks(self, instance, ignore_secondaries)
3170
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3171
      msg = result.fail_msg
3172
      if msg:
3173
        _ShutdownInstanceDisks(self, instance)
3174
        raise errors.OpExecError("Could not start instance for"
3175
                                 " full reboot: %s" % msg)
3176

    
3177
    self.cfg.MarkInstanceUp(instance.name)
3178

    
3179

    
3180
class LUShutdownInstance(LogicalUnit):
3181
  """Shutdown an instance.
3182

3183
  """
3184
  HPATH = "instance-stop"
3185
  HTYPE = constants.HTYPE_INSTANCE
3186
  _OP_REQP = ["instance_name"]
3187
  REQ_BGL = False
3188

    
3189
  def ExpandNames(self):
3190
    self._ExpandAndLockInstance()
3191

    
3192
  def BuildHooksEnv(self):
3193
    """Build hooks env.
3194

3195
    This runs on master, primary and secondary nodes of the instance.
3196

3197
    """
3198
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3199
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3200
    return env, nl, nl
3201

    
3202
  def CheckPrereq(self):
3203
    """Check prerequisites.
3204

3205
    This checks that the instance is in the cluster.
3206

3207
    """
3208
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3209
    assert self.instance is not None, \
3210
      "Cannot retrieve locked instance %s" % self.op.instance_name
3211
    _CheckNodeOnline(self, self.instance.primary_node)
3212

    
3213
  def Exec(self, feedback_fn):
3214
    """Shutdown the instance.
3215

3216
    """
3217
    instance = self.instance
3218
    node_current = instance.primary_node
3219
    self.cfg.MarkInstanceDown(instance.name)
3220
    result = self.rpc.call_instance_shutdown(node_current, instance)
3221
    msg = result.fail_msg
3222
    if msg:
3223
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3224

    
3225
    _ShutdownInstanceDisks(self, instance)
3226

    
3227

    
3228
class LUReinstallInstance(LogicalUnit):
3229
  """Reinstall an instance.
3230

3231
  """
3232
  HPATH = "instance-reinstall"
3233
  HTYPE = constants.HTYPE_INSTANCE
3234
  _OP_REQP = ["instance_name"]
3235
  REQ_BGL = False
3236

    
3237
  def ExpandNames(self):
3238
    self._ExpandAndLockInstance()
3239

    
3240
  def BuildHooksEnv(self):
3241
    """Build hooks env.
3242

3243
    This runs on master, primary and secondary nodes of the instance.
3244

3245
    """
3246
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3247
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3248
    return env, nl, nl
3249

    
3250
  def CheckPrereq(self):
3251
    """Check prerequisites.
3252

3253
    This checks that the instance is in the cluster and is not running.
3254

3255
    """
3256
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3257
    assert instance is not None, \
3258
      "Cannot retrieve locked instance %s" % self.op.instance_name
3259
    _CheckNodeOnline(self, instance.primary_node)
3260

    
3261
    if instance.disk_template == constants.DT_DISKLESS:
3262
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3263
                                 self.op.instance_name)
3264
    if instance.admin_up:
3265
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3266
                                 self.op.instance_name)
3267
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3268
                                              instance.name,
3269
                                              instance.hypervisor)
3270
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3271
                      prereq=True)
3272
    if remote_info.payload:
3273
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3274
                                 (self.op.instance_name,
3275
                                  instance.primary_node))
3276

    
3277
    self.op.os_type = getattr(self.op, "os_type", None)
3278
    if self.op.os_type is not None:
3279
      # OS verification
3280
      pnode = self.cfg.GetNodeInfo(
3281
        self.cfg.ExpandNodeName(instance.primary_node))
3282
      if pnode is None:
3283
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
3284
                                   self.op.pnode)
3285
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3286
      result.Raise("OS '%s' not in supported OS list for primary node %s" %
3287
                   (self.op.os_type, pnode.name), prereq=True)
3288

    
3289
    self.instance = instance
3290

    
3291
  def Exec(self, feedback_fn):
3292
    """Reinstall the instance.
3293

3294
    """
3295
    inst = self.instance
3296

    
3297
    if self.op.os_type is not None:
3298
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3299
      inst.os = self.op.os_type
3300
      self.cfg.Update(inst)
3301

    
3302
    _StartInstanceDisks(self, inst, None)
3303
    try:
3304
      feedback_fn("Running the instance OS create scripts...")
3305
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
3306
      result.Raise("Could not install OS for instance %s on node %s" %
3307
                   (inst.name, inst.primary_node))
3308
    finally:
3309
      _ShutdownInstanceDisks(self, inst)
3310

    
3311

    
3312
class LURenameInstance(LogicalUnit):
3313
  """Rename an instance.
3314

3315
  """
3316
  HPATH = "instance-rename"
3317
  HTYPE = constants.HTYPE_INSTANCE
3318
  _OP_REQP = ["instance_name", "new_name"]
3319

    
3320
  def BuildHooksEnv(self):
3321
    """Build hooks env.
3322

3323
    This runs on master, primary and secondary nodes of the instance.
3324

3325
    """
3326
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3327
    env["INSTANCE_NEW_NAME"] = self.op.new_name
3328
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3329
    return env, nl, nl
3330

    
3331
  def CheckPrereq(self):
3332
    """Check prerequisites.
3333

3334
    This checks that the instance is in the cluster and is not running.
3335

3336
    """
3337
    instance = self.cfg.GetInstanceInfo(
3338
      self.cfg.ExpandInstanceName(self.op.instance_name))
3339
    if instance is None:
3340
      raise errors.OpPrereqError("Instance '%s' not known" %
3341
                                 self.op.instance_name)
3342
    _CheckNodeOnline(self, instance.primary_node)
3343

    
3344
    if instance.admin_up:
3345
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3346
                                 self.op.instance_name)
3347
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3348
                                              instance.name,
3349
                                              instance.hypervisor)
3350
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3351
                      prereq=True)
3352
    if remote_info.payload:
3353
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3354
                                 (self.op.instance_name,
3355
                                  instance.primary_node))
3356
    self.instance = instance
3357

    
3358
    # new name verification
3359
    name_info = utils.HostInfo(self.op.new_name)
3360

    
3361
    self.op.new_name = new_name = name_info.name
3362
    instance_list = self.cfg.GetInstanceList()
3363
    if new_name in instance_list:
3364
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3365
                                 new_name)
3366

    
3367
    if not getattr(self.op, "ignore_ip", False):
3368
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
3369
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
3370
                                   (name_info.ip, new_name))
3371

    
3372

    
3373
  def Exec(self, feedback_fn):
3374
    """Reinstall the instance.
3375

3376
    """
3377
    inst = self.instance
3378
    old_name = inst.name
3379

    
3380
    if inst.disk_template == constants.DT_FILE:
3381
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3382

    
3383
    self.cfg.RenameInstance(inst.name, self.op.new_name)
3384
    # Change the instance lock. This is definitely safe while we hold the BGL
3385
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
3386
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
3387

    
3388
    # re-read the instance from the configuration after rename
3389
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
3390

    
3391
    if inst.disk_template == constants.DT_FILE:
3392
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3393
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
3394
                                                     old_file_storage_dir,
3395
                                                     new_file_storage_dir)
3396
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
3397
                   " (but the instance has been renamed in Ganeti)" %
3398
                   (inst.primary_node, old_file_storage_dir,
3399
                    new_file_storage_dir))
3400

    
3401
    _StartInstanceDisks(self, inst, None)
3402
    try:
3403
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
3404
                                                 old_name)
3405
      msg = result.fail_msg
3406
      if msg:
3407
        msg = ("Could not run OS rename script for instance %s on node %s"
3408
               " (but the instance has been renamed in Ganeti): %s" %
3409
               (inst.name, inst.primary_node, msg))
3410
        self.proc.LogWarning(msg)
3411
    finally:
3412
      _ShutdownInstanceDisks(self, inst)
3413

    
3414

    
3415
class LURemoveInstance(LogicalUnit):
3416
  """Remove an instance.
3417

3418
  """
3419
  HPATH = "instance-remove"
3420
  HTYPE = constants.HTYPE_INSTANCE
3421
  _OP_REQP = ["instance_name", "ignore_failures"]
3422
  REQ_BGL = False
3423

    
3424
  def ExpandNames(self):
3425
    self._ExpandAndLockInstance()
3426
    self.needed_locks[locking.LEVEL_NODE] = []
3427
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3428

    
3429
  def DeclareLocks(self, level):
3430
    if level == locking.LEVEL_NODE:
3431
      self._LockInstancesNodes()
3432

    
3433
  def BuildHooksEnv(self):
3434
    """Build hooks env.
3435

3436
    This runs on master, primary and secondary nodes of the instance.
3437

3438
    """
3439
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3440
    nl = [self.cfg.GetMasterNode()]
3441
    return env, nl, nl
3442

    
3443
  def CheckPrereq(self):
3444
    """Check prerequisites.
3445

3446
    This checks that the instance is in the cluster.
3447

3448
    """
3449
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3450
    assert self.instance is not None, \
3451
      "Cannot retrieve locked instance %s" % self.op.instance_name
3452

    
3453
  def Exec(self, feedback_fn):
3454
    """Remove the instance.
3455

3456
    """
3457
    instance = self.instance
3458
    logging.info("Shutting down instance %s on node %s",
3459
                 instance.name, instance.primary_node)
3460

    
3461
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
3462
    msg = result.fail_msg
3463
    if msg:
3464
      if self.op.ignore_failures:
3465
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
3466
      else:
3467
        raise errors.OpExecError("Could not shutdown instance %s on"
3468
                                 " node %s: %s" %
3469
                                 (instance.name, instance.primary_node, msg))
3470

    
3471
    logging.info("Removing block devices for instance %s", instance.name)
3472

    
3473
    if not _RemoveDisks(self, instance):
3474
      if self.op.ignore_failures:
3475
        feedback_fn("Warning: can't remove instance's disks")
3476
      else:
3477
        raise errors.OpExecError("Can't remove instance's disks")
3478

    
3479
    logging.info("Removing instance %s out of cluster config", instance.name)
3480

    
3481
    self.cfg.RemoveInstance(instance.name)
3482
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
3483

    
3484

    
3485
class LUQueryInstances(NoHooksLU):
3486
  """Logical unit for querying instances.
3487

3488
  """
3489
  _OP_REQP = ["output_fields", "names", "use_locking"]
3490
  REQ_BGL = False
3491
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
3492
                                    "admin_state",
3493
                                    "disk_template", "ip", "mac", "bridge",
3494
                                    "nic_mode", "nic_link",
3495
                                    "sda_size", "sdb_size", "vcpus", "tags",
3496
                                    "network_port", "beparams",
3497
                                    r"(disk)\.(size)/([0-9]+)",
3498
                                    r"(disk)\.(sizes)", "disk_usage",
3499
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
3500
                                    r"(nic)\.(bridge)/([0-9]+)",
3501
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
3502
                                    r"(disk|nic)\.(count)",
3503
                                    "serial_no", "hypervisor", "hvparams",] +
3504
                                  ["hv/%s" % name
3505
                                   for name in constants.HVS_PARAMETERS] +
3506
                                  ["be/%s" % name
3507
                                   for name in constants.BES_PARAMETERS])
3508
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
3509

    
3510

    
3511
  def ExpandNames(self):
3512
    _CheckOutputFields(static=self._FIELDS_STATIC,
3513
                       dynamic=self._FIELDS_DYNAMIC,
3514
                       selected=self.op.output_fields)
3515

    
3516
    self.needed_locks = {}
3517
    self.share_locks[locking.LEVEL_INSTANCE] = 1
3518
    self.share_locks[locking.LEVEL_NODE] = 1
3519

    
3520
    if self.op.names:
3521
      self.wanted = _GetWantedInstances(self, self.op.names)
3522
    else:
3523
      self.wanted = locking.ALL_SET
3524

    
3525
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3526
    self.do_locking = self.do_node_query and self.op.use_locking
3527
    if self.do_locking:
3528
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3529
      self.needed_locks[locking.LEVEL_NODE] = []
3530
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3531

    
3532
  def DeclareLocks(self, level):
3533
    if level == locking.LEVEL_NODE and self.do_locking:
3534
      self._LockInstancesNodes()
3535

    
3536
  def CheckPrereq(self):
3537
    """Check prerequisites.
3538

3539
    """
3540
    pass
3541

    
3542
  def Exec(self, feedback_fn):
3543
    """Computes the list of nodes and their attributes.
3544

3545
    """
3546
    all_info = self.cfg.GetAllInstancesInfo()
3547
    if self.wanted == locking.ALL_SET:
3548
      # caller didn't specify instance names, so ordering is not important
3549
      if self.do_locking:
3550
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
3551
      else:
3552
        instance_names = all_info.keys()
3553
      instance_names = utils.NiceSort(instance_names)
3554
    else:
3555
      # caller did specify names, so we must keep the ordering
3556
      if self.do_locking:
3557
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
3558
      else:
3559
        tgt_set = all_info.keys()
3560
      missing = set(self.wanted).difference(tgt_set)
3561
      if missing:
3562
        raise errors.OpExecError("Some instances were removed before"
3563
                                 " retrieving their data: %s" % missing)
3564
      instance_names = self.wanted
3565

    
3566
    instance_list = [all_info[iname] for iname in instance_names]
3567

    
3568
    # begin data gathering
3569

    
3570
    nodes = frozenset([inst.primary_node for inst in instance_list])
3571
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
3572

    
3573
    bad_nodes = []
3574
    off_nodes = []
3575
    if self.do_node_query:
3576
      live_data = {}
3577
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
3578
      for name in nodes:
3579
        result = node_data[name]
3580
        if result.offline:
3581
          # offline nodes will be in both lists
3582
          off_nodes.append(name)
3583
        if result.failed or result.fail_msg:
3584
          bad_nodes.append(name)
3585
        else:
3586
          if result.payload:
3587
            live_data.update(result.payload)
3588
          # else no instance is alive
3589
    else:
3590
      live_data = dict([(name, {}) for name in instance_names])
3591

    
3592
    # end data gathering
3593

    
3594
    HVPREFIX = "hv/"
3595
    BEPREFIX = "be/"
3596
    output = []
3597
    cluster = self.cfg.GetClusterInfo()
3598
    for instance in instance_list:
3599
      iout = []
3600
      i_hv = cluster.FillHV(instance)
3601
      i_be = cluster.FillBE(instance)
3602
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
3603
                                 nic.nicparams) for nic in instance.nics]
3604
      for field in self.op.output_fields:
3605
        st_match = self._FIELDS_STATIC.Matches(field)
3606
        if field == "name":
3607
          val = instance.name
3608
        elif field == "os":
3609
          val = instance.os
3610
        elif field == "pnode":
3611
          val = instance.primary_node
3612
        elif field == "snodes":
3613
          val = list(instance.secondary_nodes)
3614
        elif field == "admin_state":
3615
          val = instance.admin_up
3616
        elif field == "oper_state":
3617
          if instance.primary_node in bad_nodes:
3618
            val = None
3619
          else:
3620
            val = bool(live_data.get(instance.name))
3621
        elif field == "status":
3622
          if instance.primary_node in off_nodes:
3623
            val = "ERROR_nodeoffline"
3624
          elif instance.primary_node in bad_nodes:
3625
            val = "ERROR_nodedown"
3626
          else:
3627
            running = bool(live_data.get(instance.name))
3628
            if running:
3629
              if instance.admin_up:
3630
                val = "running"
3631
              else:
3632
                val = "ERROR_up"
3633
            else:
3634
              if instance.admin_up:
3635
                val = "ERROR_down"
3636
              else:
3637
                val = "ADMIN_down"
3638
        elif field == "oper_ram":
3639
          if instance.primary_node in bad_nodes:
3640
            val = None
3641
          elif instance.name in live_data:
3642
            val = live_data[instance.name].get("memory", "?")
3643
          else:
3644
            val = "-"
3645
        elif field == "vcpus":
3646
          val = i_be[constants.BE_VCPUS]
3647
        elif field == "disk_template":
3648
          val = instance.disk_template
3649
        elif field == "ip":
3650
          if instance.nics:
3651
            val = instance.nics[0].ip
3652
          else:
3653
            val = None
3654
        elif field == "nic_mode":
3655
          if instance.nics:
3656
            val = i_nicp[0][constants.NIC_MODE]
3657
          else:
3658
            val = None
3659
        elif field == "nic_link":
3660
          if instance.nics:
3661
            val = i_nicp[0][constants.NIC_LINK]
3662
          else:
3663
            val = None
3664
        elif field == "bridge":
3665
          if (instance.nics and
3666
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
3667
            val = i_nicp[0][constants.NIC_LINK]
3668
          else:
3669
            val = None
3670
        elif field == "mac":
3671
          if instance.nics:
3672
            val = instance.nics[0].mac
3673
          else:
3674
            val = None
3675
        elif field == "sda_size" or field == "sdb_size":
3676
          idx = ord(field[2]) - ord('a')
3677
          try:
3678
            val = instance.FindDisk(idx).size
3679
          except errors.OpPrereqError:
3680
            val = None
3681
        elif field == "disk_usage": # total disk usage per node
3682
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
3683
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
3684
        elif field == "tags":
3685
          val = list(instance.GetTags())
3686
        elif field == "serial_no":
3687
          val = instance.serial_no
3688
        elif field == "network_port":
3689
          val = instance.network_port
3690
        elif field == "hypervisor":
3691
          val = instance.hypervisor
3692
        elif field == "hvparams":
3693
          val = i_hv
3694
        elif (field.startswith(HVPREFIX) and
3695
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
3696
          val = i_hv.get(field[len(HVPREFIX):], None)
3697
        elif field == "beparams":
3698
          val = i_be
3699
        elif (field.startswith(BEPREFIX) and
3700
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
3701
          val = i_be.get(field[len(BEPREFIX):], None)
3702
        elif st_match and st_match.groups():
3703
          # matches a variable list
3704
          st_groups = st_match.groups()
3705
          if st_groups and st_groups[0] == "disk":
3706
            if st_groups[1] == "count":
3707
              val = len(instance.disks)
3708
            elif st_groups[1] == "sizes":
3709
              val = [disk.size for disk in instance.disks]
3710
            elif st_groups[1] == "size":
3711
              try:
3712
                val = instance.FindDisk(st_groups[2]).size
3713
              except errors.OpPrereqError:
3714
                val = None
3715
            else:
3716
              assert False, "Unhandled disk parameter"
3717
          elif st_groups[0] == "nic":
3718
            if st_groups[1] == "count":
3719
              val = len(instance.nics)
3720
            elif st_groups[1] == "macs":
3721
              val = [nic.mac for nic in instance.nics]
3722
            elif st_groups[1] == "ips":
3723
              val = [nic.ip for nic in instance.nics]
3724
            elif st_groups[1] == "modes":
3725
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
3726
            elif st_groups[1] == "links":
3727
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
3728
            elif st_groups[1] == "bridges":
3729
              val = []
3730
              for nicp in i_nicp:
3731
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3732
                  val.append(nicp[constants.NIC_LINK])
3733
                else:
3734
                  val.append(None)
3735
            else:
3736
              # index-based item
3737
              nic_idx = int(st_groups[2])
3738
              if nic_idx >= len(instance.nics):
3739
                val = None
3740
              else:
3741
                if st_groups[1] == "mac":
3742
                  val = instance.nics[nic_idx].mac
3743
                elif st_groups[1] == "ip":
3744
                  val = instance.nics[nic_idx].ip
3745
                elif st_groups[1] == "mode":
3746
                  val = i_nicp[nic_idx][constants.NIC_MODE]
3747
                elif st_groups[1] == "link":
3748
                  val = i_nicp[nic_idx][constants.NIC_LINK]
3749
                elif st_groups[1] == "bridge":
3750
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
3751
                  if nic_mode == constants.NIC_MODE_BRIDGED:
3752
                    val = i_nicp[nic_idx][constants.NIC_LINK]
3753
                  else:
3754
                    val = None
3755
                else:
3756
                  assert False, "Unhandled NIC parameter"
3757
          else:
3758
            assert False, ("Declared but unhandled variable parameter '%s'" %
3759
                           field)
3760
        else:
3761
          assert False, "Declared but unhandled parameter '%s'" % field
3762
        iout.append(val)
3763
      output.append(iout)
3764

    
3765
    return output
3766

    
3767

    
3768
class LUFailoverInstance(LogicalUnit):
3769
  """Failover an instance.
3770

3771
  """
3772
  HPATH = "instance-failover"
3773
  HTYPE = constants.HTYPE_INSTANCE
3774
  _OP_REQP = ["instance_name", "ignore_consistency"]
3775
  REQ_BGL = False
3776

    
3777
  def ExpandNames(self):
3778
    self._ExpandAndLockInstance()
3779
    self.needed_locks[locking.LEVEL_NODE] = []
3780
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3781

    
3782
  def DeclareLocks(self, level):
3783
    if level == locking.LEVEL_NODE:
3784
      self._LockInstancesNodes()
3785

    
3786
  def BuildHooksEnv(self):
3787
    """Build hooks env.
3788

3789
    This runs on master, primary and secondary nodes of the instance.
3790

3791
    """
3792
    env = {
3793
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
3794
      }
3795
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3796
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
3797
    return env, nl, nl
3798

    
3799
  def CheckPrereq(self):
3800
    """Check prerequisites.
3801

3802
    This checks that the instance is in the cluster.
3803

3804
    """
3805
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3806
    assert self.instance is not None, \
3807
      "Cannot retrieve locked instance %s" % self.op.instance_name
3808

    
3809
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3810
    if instance.disk_template not in constants.DTS_NET_MIRROR:
3811
      raise errors.OpPrereqError("Instance's disk layout is not"
3812
                                 " network mirrored, cannot failover.")
3813

    
3814
    secondary_nodes = instance.secondary_nodes
3815
    if not secondary_nodes:
3816
      raise errors.ProgrammerError("no secondary node but using "
3817
                                   "a mirrored disk template")
3818

    
3819
    target_node = secondary_nodes[0]
3820
    _CheckNodeOnline(self, target_node)
3821
    _CheckNodeNotDrained(self, target_node)
3822
    if instance.admin_up:
3823
      # check memory requirements on the secondary node
3824
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
3825
                           instance.name, bep[constants.BE_MEMORY],
3826
                           instance.hypervisor)
3827
    else:
3828
      self.LogInfo("Not checking memory on the secondary node as"
3829
                   " instance will not be started")
3830

    
3831
    # check bridge existance
3832
    _CheckInstanceBridgesExist(self, instance, node=target_node)
3833

    
3834
  def Exec(self, feedback_fn):
3835
    """Failover an instance.
3836

3837
    The failover is done by shutting it down on its present node and
3838
    starting it on the secondary.
3839

3840
    """
3841
    instance = self.instance
3842

    
3843
    source_node = instance.primary_node
3844
    target_node = instance.secondary_nodes[0]
3845

    
3846
    feedback_fn("* checking disk consistency between source and target")
3847
    for dev in instance.disks:
3848
      # for drbd, these are drbd over lvm
3849
      if not _CheckDiskConsistency(self, dev, target_node, False):
3850
        if instance.admin_up and not self.op.ignore_consistency:
3851
          raise errors.OpExecError("Disk %s is degraded on target node,"
3852
                                   " aborting failover." % dev.iv_name)
3853

    
3854
    feedback_fn("* shutting down instance on source node")
3855
    logging.info("Shutting down instance %s on node %s",
3856
                 instance.name, source_node)
3857

    
3858
    result = self.rpc.call_instance_shutdown(source_node, instance)
3859
    msg = result.fail_msg
3860
    if msg:
3861
      if self.op.ignore_consistency:
3862
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
3863
                             " Proceeding anyway. Please make sure node"
3864
                             " %s is down. Error details: %s",
3865
                             instance.name, source_node, source_node, msg)
3866
      else:
3867
        raise errors.OpExecError("Could not shutdown instance %s on"
3868
                                 " node %s: %s" %
3869
                                 (instance.name, source_node, msg))
3870

    
3871
    feedback_fn("* deactivating the instance's disks on source node")
3872
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
3873
      raise errors.OpExecError("Can't shut down the instance's disks.")
3874

    
3875
    instance.primary_node = target_node
3876
    # distribute new instance config to the other nodes
3877
    self.cfg.Update(instance)
3878

    
3879
    # Only start the instance if it's marked as up
3880
    if instance.admin_up:
3881
      feedback_fn("* activating the instance's disks on target node")
3882
      logging.info("Starting instance %s on node %s",
3883
                   instance.name, target_node)
3884

    
3885
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
3886
                                               ignore_secondaries=True)
3887
      if not disks_ok:
3888
        _ShutdownInstanceDisks(self, instance)
3889
        raise errors.OpExecError("Can't activate the instance's disks")
3890

    
3891
      feedback_fn("* starting the instance on the target node")
3892
      result = self.rpc.call_instance_start(target_node, instance, None, None)
3893
      msg = result.fail_msg
3894
      if msg:
3895
        _ShutdownInstanceDisks(self, instance)
3896
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
3897
                                 (instance.name, target_node, msg))
3898

    
3899

    
3900
class LUMigrateInstance(LogicalUnit):
3901
  """Migrate an instance.
3902

3903
  This is migration without shutting down, compared to the failover,
3904
  which is done with shutdown.
3905

3906
  """
3907
  HPATH = "instance-migrate"
3908
  HTYPE = constants.HTYPE_INSTANCE
3909
  _OP_REQP = ["instance_name", "live", "cleanup"]
3910

    
3911
  REQ_BGL = False
3912

    
3913
  def ExpandNames(self):
3914
    self._ExpandAndLockInstance()
3915

    
3916
    self.needed_locks[locking.LEVEL_NODE] = []
3917
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3918

    
3919
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
3920
                                       self.op.live, self.op.cleanup)
3921
    self.tasklets.append(self._migrater)
3922

    
3923
  def DeclareLocks(self, level):
3924
    if level == locking.LEVEL_NODE:
3925
      self._LockInstancesNodes()
3926

    
3927
  def BuildHooksEnv(self):
3928
    """Build hooks env.
3929

3930
    This runs on master, primary and secondary nodes of the instance.
3931

3932
    """
3933
    instance = self._migrater.instance
3934
    env = _BuildInstanceHookEnvByObject(self, instance)
3935
    env["MIGRATE_LIVE"] = self.op.live
3936
    env["MIGRATE_CLEANUP"] = self.op.cleanup
3937
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
3938
    return env, nl, nl
3939

    
3940

    
3941
class TLMigrateInstance(Tasklet):
3942
  def __init__(self, lu, instance_name, live, cleanup):
3943
    """Initializes this class.
3944

3945
    """
3946
    Tasklet.__init__(self, lu)
3947

    
3948
    # Parameters
3949
    self.instance_name = instance_name
3950
    self.live = live
3951
    self.cleanup = cleanup
3952

    
3953
  def CheckPrereq(self):
3954
    """Check prerequisites.
3955

3956
    This checks that the instance is in the cluster.
3957

3958
    """
3959
    instance = self.cfg.GetInstanceInfo(
3960
      self.cfg.ExpandInstanceName(self.instance_name))
3961
    if instance is None:
3962
      raise errors.OpPrereqError("Instance '%s' not known" %
3963
                                 self.instance_name)
3964

    
3965
    if instance.disk_template != constants.DT_DRBD8:
3966
      raise errors.OpPrereqError("Instance's disk layout is not"
3967
                                 " drbd8, cannot migrate.")
3968

    
3969
    secondary_nodes = instance.secondary_nodes
3970
    if not secondary_nodes:
3971
      raise errors.ConfigurationError("No secondary node but using"
3972
                                      " drbd8 disk template")
3973

    
3974
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
3975

    
3976
    target_node = secondary_nodes[0]
3977
    # check memory requirements on the secondary node
3978
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
3979
                         instance.name, i_be[constants.BE_MEMORY],
3980
                         instance.hypervisor)
3981

    
3982
    # check bridge existance
3983
    _CheckInstanceBridgesExist(self, instance, node=target_node)
3984

    
3985
    if not self.cleanup:
3986
      _CheckNodeNotDrained(self, target_node)
3987
      result = self.rpc.call_instance_migratable(instance.primary_node,
3988
                                                 instance)
3989
      result.Raise("Can't migrate, please use failover", prereq=True)
3990

    
3991
    self.instance = instance
3992

    
3993
  def _WaitUntilSync(self):
3994
    """Poll with custom rpc for disk sync.
3995

3996
    This uses our own step-based rpc call.
3997

3998
    """
3999
    self.feedback_fn("* wait until resync is done")
4000
    all_done = False
4001
    while not all_done:
4002
      all_done = True
4003
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
4004
                                            self.nodes_ip,
4005
                                            self.instance.disks)
4006
      min_percent = 100
4007
      for node, nres in result.items():
4008
        nres.Raise("Cannot resync disks on node %s" % node)
4009
        node_done, node_percent = nres.payload
4010
        all_done = all_done and node_done
4011
        if node_percent is not None:
4012
          min_percent = min(min_percent, node_percent)
4013
      if not all_done:
4014
        if min_percent < 100:
4015
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
4016
        time.sleep(2)
4017

    
4018
  def _EnsureSecondary(self, node):
4019
    """Demote a node to secondary.
4020

4021
    """
4022
    self.feedback_fn("* switching node %s to secondary mode" % node)
4023

    
4024
    for dev in self.instance.disks:
4025
      self.cfg.SetDiskID(dev, node)
4026

    
4027
    result = self.rpc.call_blockdev_close(node, self.instance.name,
4028
                                          self.instance.disks)
4029
    result.Raise("Cannot change disk to secondary on node %s" % node)
4030

    
4031
  def _GoStandalone(self):
4032
    """Disconnect from the network.
4033

4034
    """
4035
    self.feedback_fn("* changing into standalone mode")
4036
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
4037
                                               self.instance.disks)
4038
    for node, nres in result.items():
4039
      nres.Raise("Cannot disconnect disks node %s" % node)
4040

    
4041
  def _GoReconnect(self, multimaster):
4042
    """Reconnect to the network.
4043

4044
    """
4045
    if multimaster:
4046
      msg = "dual-master"
4047
    else:
4048
      msg = "single-master"
4049
    self.feedback_fn("* changing disks into %s mode" % msg)
4050
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
4051
                                           self.instance.disks,
4052
                                           self.instance.name, multimaster)
4053
    for node, nres in result.items():
4054
      nres.Raise("Cannot change disks config on node %s" % node)
4055

    
4056
  def _ExecCleanup(self):
4057
    """Try to cleanup after a failed migration.
4058

4059
    The cleanup is done by:
4060
      - check that the instance is running only on one node
4061
        (and update the config if needed)
4062
      - change disks on its secondary node to secondary
4063
      - wait until disks are fully synchronized
4064
      - disconnect from the network
4065
      - change disks into single-master mode
4066
      - wait again until disks are fully synchronized
4067

4068
    """
4069
    instance = self.instance
4070
    target_node = self.target_node
4071
    source_node = self.source_node
4072

    
4073
    # check running on only one node
4074
    self.feedback_fn("* checking where the instance actually runs"
4075
                     " (if this hangs, the hypervisor might be in"
4076
                     " a bad state)")
4077
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
4078
    for node, result in ins_l.items():
4079
      result.Raise("Can't contact node %s" % node)
4080

    
4081
    runningon_source = instance.name in ins_l[source_node].payload
4082
    runningon_target = instance.name in ins_l[target_node].payload
4083

    
4084
    if runningon_source and runningon_target:
4085
      raise errors.OpExecError("Instance seems to be running on two nodes,"
4086
                               " or the hypervisor is confused. You will have"
4087
                               " to ensure manually that it runs only on one"
4088
                               " and restart this operation.")
4089

    
4090
    if not (runningon_source or runningon_target):
4091
      raise errors.OpExecError("Instance does not seem to be running at all."
4092
                               " In this case, it's safer to repair by"
4093
                               " running 'gnt-instance stop' to ensure disk"
4094
                               " shutdown, and then restarting it.")
4095

    
4096
    if runningon_target:
4097
      # the migration has actually succeeded, we need to update the config
4098
      self.feedback_fn("* instance running on secondary node (%s),"
4099
                       " updating config" % target_node)
4100
      instance.primary_node = target_node
4101
      self.cfg.Update(instance)
4102
      demoted_node = source_node
4103
    else:
4104
      self.feedback_fn("* instance confirmed to be running on its"
4105
                       " primary node (%s)" % source_node)
4106
      demoted_node = target_node
4107

    
4108
    self._EnsureSecondary(demoted_node)
4109
    try:
4110
      self._WaitUntilSync()
4111
    except errors.OpExecError:
4112
      # we ignore here errors, since if the device is standalone, it
4113
      # won't be able to sync
4114
      pass
4115
    self._GoStandalone()
4116
    self._GoReconnect(False)
4117
    self._WaitUntilSync()
4118

    
4119
    self.feedback_fn("* done")
4120

    
4121
  def _RevertDiskStatus(self):
4122
    """Try to revert the disk status after a failed migration.
4123

4124
    """
4125
    target_node = self.target_node
4126
    try:
4127
      self._EnsureSecondary(target_node)
4128
      self._GoStandalone()
4129
      self._GoReconnect(False)
4130
      self._WaitUntilSync()
4131
    except errors.OpExecError, err:
4132
      self.lu.LogWarning("Migration failed and I can't reconnect the"
4133
                         " drives: error '%s'\n"
4134
                         "Please look and recover the instance status" %
4135
                         str(err))
4136

    
4137
  def _AbortMigration(self):
4138
    """Call the hypervisor code to abort a started migration.
4139

4140
    """
4141
    instance = self.instance
4142
    target_node = self.target_node
4143
    migration_info = self.migration_info
4144

    
4145
    abort_result = self.rpc.call_finalize_migration(target_node,
4146
                                                    instance,
4147
                                                    migration_info,
4148
                                                    False)
4149
    abort_msg = abort_result.fail_msg
4150
    if abort_msg:
4151
      logging.error("Aborting migration failed on target node %s: %s" %
4152
                    (target_node, abort_msg))
4153
      # Don't raise an exception here, as we stil have to try to revert the
4154
      # disk status, even if this step failed.
4155

    
4156
  def _ExecMigration(self):
4157
    """Migrate an instance.
4158

4159
    The migrate is done by:
4160
      - change the disks into dual-master mode
4161
      - wait until disks are fully synchronized again
4162
      - migrate the instance
4163
      - change disks on the new secondary node (the old primary) to secondary
4164
      - wait until disks are fully synchronized
4165
      - change disks into single-master mode
4166

4167
    """
4168
    instance = self.instance
4169
    target_node = self.target_node
4170
    source_node = self.source_node
4171

    
4172
    self.feedback_fn("* checking disk consistency between source and target")
4173
    for dev in instance.disks:
4174
      if not _CheckDiskConsistency(self, dev, target_node, False):
4175
        raise errors.OpExecError("Disk %s is degraded or not fully"
4176
                                 " synchronized on target node,"
4177
                                 " aborting migrate." % dev.iv_name)
4178

    
4179
    # First get the migration information from the remote node
4180
    result = self.rpc.call_migration_info(source_node, instance)
4181
    msg = result.fail_msg
4182
    if msg:
4183
      log_err = ("Failed fetching source migration information from %s: %s" %
4184
                 (source_node, msg))
4185
      logging.error(log_err)
4186
      raise errors.OpExecError(log_err)
4187

    
4188
    self.migration_info = migration_info = result.payload
4189

    
4190
    # Then switch the disks to master/master mode
4191
    self._EnsureSecondary(target_node)
4192
    self._GoStandalone()
4193
    self._GoReconnect(True)
4194
    self._WaitUntilSync()
4195

    
4196
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
4197
    result = self.rpc.call_accept_instance(target_node,
4198
                                           instance,
4199
                                           migration_info,
4200
                                           self.nodes_ip[target_node])
4201

    
4202
    msg = result.fail_msg
4203
    if msg:
4204
      logging.error("Instance pre-migration failed, trying to revert"
4205
                    " disk status: %s", msg)
4206
      self._AbortMigration()
4207
      self._RevertDiskStatus()
4208
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
4209
                               (instance.name, msg))
4210

    
4211
    self.feedback_fn("* migrating instance to %s" % target_node)
4212
    time.sleep(10)
4213
    result = self.rpc.call_instance_migrate(source_node, instance,
4214
                                            self.nodes_ip[target_node],
4215
                                            self.live)
4216
    msg = result.fail_msg
4217
    if msg:
4218
      logging.error("Instance migration failed, trying to revert"
4219
                    " disk status: %s", msg)
4220
      self._AbortMigration()
4221
      self._RevertDiskStatus()
4222
      raise errors.OpExecError("Could not migrate instance %s: %s" %
4223
                               (instance.name, msg))
4224
    time.sleep(10)
4225

    
4226
    instance.primary_node = target_node
4227
    # distribute new instance config to the other nodes
4228
    self.cfg.Update(instance)
4229

    
4230
    result = self.rpc.call_finalize_migration(target_node,
4231
                                              instance,
4232
                                              migration_info,
4233
                                              True)
4234
    msg = result.fail_msg
4235
    if msg:
4236
      logging.error("Instance migration succeeded, but finalization failed:"
4237
                    " %s" % msg)
4238
      raise errors.OpExecError("Could not finalize instance migration: %s" %
4239
                               msg)
4240

    
4241
    self._EnsureSecondary(source_node)
4242
    self._WaitUntilSync()
4243
    self._GoStandalone()
4244
    self._GoReconnect(False)
4245
    self._WaitUntilSync()
4246

    
4247
    self.feedback_fn("* done")
4248

    
4249
  def Exec(self, feedback_fn):
4250
    """Perform the migration.
4251

4252
    """
4253
    self.feedback_fn = feedback_fn
4254

    
4255
    self.source_node = self.instance.primary_node
4256
    self.target_node = self.instance.secondary_nodes[0]
4257
    self.all_nodes = [self.source_node, self.target_node]
4258
    self.nodes_ip = {
4259
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
4260
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
4261
      }
4262

    
4263
    if self.cleanup:
4264
      return self._ExecCleanup()
4265
    else:
4266
      return self._ExecMigration()
4267

    
4268

    
4269
def _CreateBlockDev(lu, node, instance, device, force_create,
4270
                    info, force_open):
4271
  """Create a tree of block devices on a given node.
4272

4273
  If this device type has to be created on secondaries, create it and
4274
  all its children.
4275

4276
  If not, just recurse to children keeping the same 'force' value.
4277

4278
  @param lu: the lu on whose behalf we execute
4279
  @param node: the node on which to create the device
4280
  @type instance: L{objects.Instance}
4281
  @param instance: the instance which owns the device
4282
  @type device: L{objects.Disk}
4283
  @param device: the device to create
4284
  @type force_create: boolean
4285
  @param force_create: whether to force creation of this device; this
4286
      will be change to True whenever we find a device which has
4287
      CreateOnSecondary() attribute
4288
  @param info: the extra 'metadata' we should attach to the device
4289
      (this will be represented as a LVM tag)
4290
  @type force_open: boolean
4291
  @param force_open: this parameter will be passes to the
4292
      L{backend.BlockdevCreate} function where it specifies
4293
      whether we run on primary or not, and it affects both
4294
      the child assembly and the device own Open() execution
4295

4296
  """
4297
  if device.CreateOnSecondary():
4298
    force_create = True
4299

    
4300
  if device.children:
4301
    for child in device.children:
4302
      _CreateBlockDev(lu, node, instance, child, force_create,
4303
                      info, force_open)
4304

    
4305
  if not force_create:
4306
    return
4307

    
4308
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
4309

    
4310

    
4311
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
4312
  """Create a single block device on a given node.
4313

4314
  This will not recurse over children of the device, so they must be
4315
  created in advance.
4316

4317
  @param lu: the lu on whose behalf we execute
4318
  @param node: the node on which to create the device
4319
  @type instance: L{objects.Instance}
4320
  @param instance: the instance which owns the device
4321
  @type device: L{objects.Disk}
4322
  @param device: the device to create
4323
  @param info: the extra 'metadata' we should attach to the device
4324
      (this will be represented as a LVM tag)
4325
  @type force_open: boolean
4326
  @param force_open: this parameter will be passes to the
4327
      L{backend.BlockdevCreate} function where it specifies
4328
      whether we run on primary or not, and it affects both
4329
      the child assembly and the device own Open() execution
4330

4331
  """
4332
  lu.cfg.SetDiskID(device, node)
4333
  result = lu.rpc.call_blockdev_create(node, device, device.size,
4334
                                       instance.name, force_open, info)
4335
  result.Raise("Can't create block device %s on"
4336
               " node %s for instance %s" % (device, node, instance.name))
4337
  if device.physical_id is None:
4338
    device.physical_id = result.payload
4339

    
4340

    
4341
def _GenerateUniqueNames(lu, exts):
4342
  """Generate a suitable LV name.
4343

4344
  This will generate a logical volume name for the given instance.
4345

4346
  """
4347
  results = []
4348
  for val in exts:
4349
    new_id = lu.cfg.GenerateUniqueID()
4350
    results.append("%s%s" % (new_id, val))
4351
  return results
4352

    
4353

    
4354
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
4355
                         p_minor, s_minor):
4356
  """Generate a drbd8 device complete with its children.
4357

4358
  """
4359
  port = lu.cfg.AllocatePort()
4360
  vgname = lu.cfg.GetVGName()
4361
  shared_secret = lu.cfg.GenerateDRBDSecret()
4362
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
4363
                          logical_id=(vgname, names[0]))
4364
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
4365
                          logical_id=(vgname, names[1]))
4366
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
4367
                          logical_id=(primary, secondary, port,
4368
                                      p_minor, s_minor,
4369
                                      shared_secret),
4370
                          children=[dev_data, dev_meta],
4371
                          iv_name=iv_name)
4372
  return drbd_dev
4373

    
4374

    
4375
def _GenerateDiskTemplate(lu, template_name,
4376
                          instance_name, primary_node,
4377
                          secondary_nodes, disk_info,
4378
                          file_storage_dir, file_driver,
4379
                          base_index):
4380
  """Generate the entire disk layout for a given template type.
4381

4382
  """
4383
  #TODO: compute space requirements
4384

    
4385
  vgname = lu.cfg.GetVGName()
4386
  disk_count = len(disk_info)
4387
  disks = []
4388
  if template_name == constants.DT_DISKLESS:
4389
    pass
4390
  elif template_name == constants.DT_PLAIN:
4391
    if len(secondary_nodes) != 0:
4392
      raise errors.ProgrammerError("Wrong template configuration")
4393

    
4394
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
4395
                                      for i in range(disk_count)])
4396
    for idx, disk in enumerate(disk_info):
4397
      disk_index = idx + base_index
4398
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
4399
                              logical_id=(vgname, names[idx]),
4400
                              iv_name="disk/%d" % disk_index,
4401
                              mode=disk["mode"])
4402
      disks.append(disk_dev)
4403
  elif template_name == constants.DT_DRBD8:
4404
    if len(secondary_nodes) != 1:
4405
      raise errors.ProgrammerError("Wrong template configuration")
4406
    remote_node = secondary_nodes[0]
4407
    minors = lu.cfg.AllocateDRBDMinor(
4408
      [primary_node, remote_node] * len(disk_info), instance_name)
4409

    
4410
    names = []
4411
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
4412
                                               for i in range(disk_count)]):
4413
      names.append(lv_prefix + "_data")
4414
      names.append(lv_prefix + "_meta")
4415
    for idx, disk in enumerate(disk_info):
4416
      disk_index = idx + base_index
4417
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
4418
                                      disk["size"], names[idx*2:idx*2+2],
4419
                                      "disk/%d" % disk_index,
4420
                                      minors[idx*2], minors[idx*2+1])
4421
      disk_dev.mode = disk["mode"]
4422
      disks.append(disk_dev)
4423
  elif template_name == constants.DT_FILE:
4424
    if len(secondary_nodes) != 0:
4425
      raise errors.ProgrammerError("Wrong template configuration")
4426

    
4427
    for idx, disk in enumerate(disk_info):
4428
      disk_index = idx + base_index
4429
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
4430
                              iv_name="disk/%d" % disk_index,
4431
                              logical_id=(file_driver,
4432
                                          "%s/disk%d" % (file_storage_dir,
4433
                                                         disk_index)),
4434
                              mode=disk["mode"])
4435
      disks.append(disk_dev)
4436
  else:
4437
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
4438
  return disks
4439

    
4440

    
4441
def _GetInstanceInfoText(instance):
4442
  """Compute that text that should be added to the disk's metadata.
4443

4444
  """
4445
  return "originstname+%s" % instance.name
4446

    
4447

    
4448
def _CreateDisks(lu, instance):
4449
  """Create all disks for an instance.
4450

4451
  This abstracts away some work from AddInstance.
4452

4453
  @type lu: L{LogicalUnit}
4454
  @param lu: the logical unit on whose behalf we execute
4455
  @type instance: L{objects.Instance}
4456
  @param instance: the instance whose disks we should create
4457
  @rtype: boolean
4458
  @return: the success of the creation
4459

4460
  """
4461
  info = _GetInstanceInfoText(instance)
4462
  pnode = instance.primary_node
4463

    
4464
  if instance.disk_template == constants.DT_FILE:
4465
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4466
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
4467

    
4468
    result.Raise("Failed to create directory '%s' on"
4469
                 " node %s: %s" % (file_storage_dir, pnode))
4470

    
4471
  # Note: this needs to be kept in sync with adding of disks in
4472
  # LUSetInstanceParams
4473
  for device in instance.disks:
4474
    logging.info("Creating volume %s for instance %s",
4475
                 device.iv_name, instance.name)
4476
    #HARDCODE
4477
    for node in instance.all_nodes:
4478
      f_create = node == pnode
4479
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
4480

    
4481

    
4482
def _RemoveDisks(lu, instance):
4483
  """Remove all disks for an instance.
4484

4485
  This abstracts away some work from `AddInstance()` and
4486
  `RemoveInstance()`. Note that in case some of the devices couldn't
4487
  be removed, the removal will continue with the other ones (compare
4488
  with `_CreateDisks()`).
4489

4490
  @type lu: L{LogicalUnit}
4491
  @param lu: the logical unit on whose behalf we execute
4492
  @type instance: L{objects.Instance}
4493
  @param instance: the instance whose disks we should remove
4494
  @rtype: boolean
4495
  @return: the success of the removal
4496

4497
  """
4498
  logging.info("Removing block devices for instance %s", instance.name)
4499

    
4500
  all_result = True
4501
  for device in instance.disks:
4502
    for node, disk in device.ComputeNodeTree(instance.primary_node):
4503
      lu.cfg.SetDiskID(disk, node)
4504
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
4505
      if msg:
4506
        lu.LogWarning("Could not remove block device %s on node %s,"
4507
                      " continuing anyway: %s", device.iv_name, node, msg)
4508
        all_result = False
4509

    
4510
  if instance.disk_template == constants.DT_FILE:
4511
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4512
    result = lu.rpc.call_file_storage_dir_remove(instance.primary_node,
4513
                                                 file_storage_dir)
4514
    msg = result.fail_msg
4515
    if msg:
4516
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
4517
                    file_storage_dir, instance.primary_node, msg)
4518
      all_result = False
4519

    
4520
  return all_result
4521

    
4522

    
4523
def _ComputeDiskSize(disk_template, disks):
4524
  """Compute disk size requirements in the volume group
4525

4526
  """
4527
  # Required free disk space as a function of disk and swap space
4528
  req_size_dict = {
4529
    constants.DT_DISKLESS: None,
4530
    constants.DT_PLAIN: sum(d["size"] for d in disks),
4531
    # 128 MB are added for drbd metadata for each disk
4532
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
4533
    constants.DT_FILE: None,
4534
  }
4535

    
4536
  if disk_template not in req_size_dict:
4537
    raise errors.ProgrammerError("Disk template '%s' size requirement"
4538
                                 " is unknown" %  disk_template)
4539

    
4540
  return req_size_dict[disk_template]
4541

    
4542

    
4543
def _CheckHVParams(lu, nodenames, hvname, hvparams):
4544
  """Hypervisor parameter validation.
4545

4546
  This function abstract the hypervisor parameter validation to be
4547
  used in both instance create and instance modify.
4548

4549
  @type lu: L{LogicalUnit}
4550
  @param lu: the logical unit for which we check
4551
  @type nodenames: list
4552
  @param nodenames: the list of nodes on which we should check
4553
  @type hvname: string
4554
  @param hvname: the name of the hypervisor we should use
4555
  @type hvparams: dict
4556
  @param hvparams: the parameters which we need to check
4557
  @raise errors.OpPrereqError: if the parameters are not valid
4558

4559
  """
4560
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
4561
                                                  hvname,
4562
                                                  hvparams)
4563
  for node in nodenames:
4564
    info = hvinfo[node]
4565
    if info.offline:
4566
      continue
4567
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
4568

    
4569

    
4570
class LUCreateInstance(LogicalUnit):
4571
  """Create an instance.
4572

4573
  """
4574
  HPATH = "instance-add"
4575
  HTYPE = constants.HTYPE_INSTANCE
4576
  _OP_REQP = ["instance_name", "disks", "disk_template",
4577
              "mode", "start",
4578
              "wait_for_sync", "ip_check", "nics",
4579
              "hvparams", "beparams"]
4580
  REQ_BGL = False
4581

    
4582
  def _ExpandNode(self, node):
4583
    """Expands and checks one node name.
4584

4585
    """
4586
    node_full = self.cfg.ExpandNodeName(node)
4587
    if node_full is None:
4588
      raise errors.OpPrereqError("Unknown node %s" % node)
4589
    return node_full
4590

    
4591
  def ExpandNames(self):
4592
    """ExpandNames for CreateInstance.
4593

4594
    Figure out the right locks for instance creation.
4595

4596
    """
4597
    self.needed_locks = {}
4598

    
4599
    # set optional parameters to none if they don't exist
4600
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
4601
      if not hasattr(self.op, attr):
4602
        setattr(self.op, attr, None)
4603

    
4604
    # cheap checks, mostly valid constants given
4605

    
4606
    # verify creation mode
4607
    if self.op.mode not in (constants.INSTANCE_CREATE,
4608
                            constants.INSTANCE_IMPORT):
4609
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
4610
                                 self.op.mode)
4611

    
4612
    # disk template and mirror node verification
4613
    if self.op.disk_template not in constants.DISK_TEMPLATES:
4614
      raise errors.OpPrereqError("Invalid disk template name")
4615

    
4616
    if self.op.hypervisor is None:
4617
      self.op.hypervisor = self.cfg.GetHypervisorType()
4618

    
4619
    cluster = self.cfg.GetClusterInfo()
4620
    enabled_hvs = cluster.enabled_hypervisors
4621
    if self.op.hypervisor not in enabled_hvs:
4622
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
4623
                                 " cluster (%s)" % (self.op.hypervisor,
4624
                                  ",".join(enabled_hvs)))
4625

    
4626
    # check hypervisor parameter syntax (locally)
4627
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4628
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
4629
                                  self.op.hvparams)
4630
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
4631
    hv_type.CheckParameterSyntax(filled_hvp)
4632
    self.hv_full = filled_hvp
4633

    
4634
    # fill and remember the beparams dict
4635
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4636
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
4637
                                    self.op.beparams)
4638

    
4639
    #### instance parameters check
4640

    
4641
    # instance name verification
4642
    hostname1 = utils.HostInfo(self.op.instance_name)
4643
    self.op.instance_name = instance_name = hostname1.name
4644

    
4645
    # this is just a preventive check, but someone might still add this
4646
    # instance in the meantime, and creation will fail at lock-add time
4647
    if instance_name in self.cfg.GetInstanceList():
4648
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4649
                                 instance_name)
4650

    
4651
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
4652

    
4653
    # NIC buildup
4654
    self.nics = []
4655
    for idx, nic in enumerate(self.op.nics):
4656
      nic_mode_req = nic.get("mode", None)
4657
      nic_mode = nic_mode_req
4658
      if nic_mode is None:
4659
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
4660

    
4661
      # in routed mode, for the first nic, the default ip is 'auto'
4662
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
4663
        default_ip_mode = constants.VALUE_AUTO
4664
      else:
4665
        default_ip_mode = constants.VALUE_NONE
4666

    
4667
      # ip validity checks
4668
      ip = nic.get("ip", default_ip_mode)
4669
      if ip is None or ip.lower() == constants.VALUE_NONE:
4670
        nic_ip = None
4671
      elif ip.lower() == constants.VALUE_AUTO:
4672
        nic_ip = hostname1.ip
4673
      else:
4674
        if not utils.IsValidIP(ip):
4675
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
4676
                                     " like a valid IP" % ip)
4677
        nic_ip = ip
4678

    
4679
      # TODO: check the ip for uniqueness !!
4680
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
4681
        raise errors.OpPrereqError("Routed nic mode requires an ip address")
4682

    
4683
      # MAC address verification
4684
      mac = nic.get("mac", constants.VALUE_AUTO)
4685
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
4686
        if not utils.IsValidMac(mac.lower()):
4687
          raise errors.OpPrereqError("Invalid MAC address specified: %s" %
4688
                                     mac)
4689
      # bridge verification
4690
      bridge = nic.get("bridge", None)
4691
      link = nic.get("link", None)
4692
      if bridge and link:
4693
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
4694
                                   " at the same time")
4695
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
4696
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic")
4697
      elif bridge:
4698
        link = bridge
4699

    
4700
      nicparams = {}
4701
      if nic_mode_req:
4702
        nicparams[constants.NIC_MODE] = nic_mode_req
4703
      if link:
4704
        nicparams[constants.NIC_LINK] = link
4705

    
4706
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4707
                                      nicparams)
4708
      objects.NIC.CheckParameterSyntax(check_params)
4709
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
4710

    
4711
    # disk checks/pre-build
4712
    self.disks = []
4713
    for disk in self.op.disks:
4714
      mode = disk.get("mode", constants.DISK_RDWR)
4715
      if mode not in constants.DISK_ACCESS_SET:
4716
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
4717
                                   mode)
4718
      size = disk.get("size", None)
4719
      if size is None:
4720
        raise errors.OpPrereqError("Missing disk size")
4721
      try:
4722
        size = int(size)
4723
      except ValueError:
4724
        raise errors.OpPrereqError("Invalid disk size '%s'" % size)
4725
      self.disks.append({"size": size, "mode": mode})
4726

    
4727
    # used in CheckPrereq for ip ping check
4728
    self.check_ip = hostname1.ip
4729

    
4730
    # file storage checks
4731
    if (self.op.file_driver and
4732
        not self.op.file_driver in constants.FILE_DRIVER):
4733
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
4734
                                 self.op.file_driver)
4735

    
4736
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
4737
      raise errors.OpPrereqError("File storage directory path not absolute")
4738

    
4739
    ### Node/iallocator related checks
4740
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
4741
      raise errors.OpPrereqError("One and only one of iallocator and primary"
4742
                                 " node must be given")
4743

    
4744
    if self.op.iallocator:
4745
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4746
    else:
4747
      self.op.pnode = self._ExpandNode(self.op.pnode)
4748
      nodelist = [self.op.pnode]
4749
      if self.op.snode is not None:
4750
        self.op.snode = self._ExpandNode(self.op.snode)
4751
        nodelist.append(self.op.snode)
4752
      self.needed_locks[locking.LEVEL_NODE] = nodelist
4753

    
4754
    # in case of import lock the source node too
4755
    if self.op.mode == constants.INSTANCE_IMPORT:
4756
      src_node = getattr(self.op, "src_node", None)
4757
      src_path = getattr(self.op, "src_path", None)
4758

    
4759
      if src_path is None:
4760
        self.op.src_path = src_path = self.op.instance_name
4761

    
4762
      if src_node is None:
4763
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4764
        self.op.src_node = None
4765
        if os.path.isabs(src_path):
4766
          raise errors.OpPrereqError("Importing an instance from an absolute"
4767
                                     " path requires a source node option.")
4768
      else:
4769
        self.op.src_node = src_node = self._ExpandNode(src_node)
4770
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4771
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
4772
        if not os.path.isabs(src_path):
4773
          self.op.src_path = src_path = \
4774
            os.path.join(constants.EXPORT_DIR, src_path)
4775

    
4776
    else: # INSTANCE_CREATE
4777
      if getattr(self.op, "os_type", None) is None:
4778
        raise errors.OpPrereqError("No guest OS specified")
4779

    
4780
  def _RunAllocator(self):
4781
    """Run the allocator based on input opcode.
4782

4783
    """
4784
    nics = [n.ToDict() for n in self.nics]
4785
    ial = IAllocator(self.cfg, self.rpc,
4786
                     mode=constants.IALLOCATOR_MODE_ALLOC,
4787
                     name=self.op.instance_name,
4788
                     disk_template=self.op.disk_template,
4789
                     tags=[],
4790
                     os=self.op.os_type,
4791
                     vcpus=self.be_full[constants.BE_VCPUS],
4792
                     mem_size=self.be_full[constants.BE_MEMORY],
4793
                     disks=self.disks,
4794
                     nics=nics,
4795
                     hypervisor=self.op.hypervisor,
4796
                     )
4797

    
4798
    ial.Run(self.op.iallocator)
4799

    
4800
    if not ial.success:
4801
      raise errors.OpPrereqError("Can't compute nodes using"
4802
                                 " iallocator '%s': %s" % (self.op.iallocator,
4803
                                                           ial.info))
4804
    if len(ial.nodes) != ial.required_nodes:
4805
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
4806
                                 " of nodes (%s), required %s" %
4807
                                 (self.op.iallocator, len(ial.nodes),
4808
                                  ial.required_nodes))
4809
    self.op.pnode = ial.nodes[0]
4810
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
4811
                 self.op.instance_name, self.op.iallocator,
4812
                 ", ".join(ial.nodes))
4813
    if ial.required_nodes == 2:
4814
      self.op.snode = ial.nodes[1]
4815

    
4816
  def BuildHooksEnv(self):
4817
    """Build hooks env.
4818

4819
    This runs on master, primary and secondary nodes of the instance.
4820

4821
    """
4822
    env = {
4823
      "ADD_MODE": self.op.mode,
4824
      }
4825
    if self.op.mode == constants.INSTANCE_IMPORT:
4826
      env["SRC_NODE"] = self.op.src_node
4827
      env["SRC_PATH"] = self.op.src_path
4828
      env["SRC_IMAGES"] = self.src_images
4829

    
4830
    env.update(_BuildInstanceHookEnv(
4831
      name=self.op.instance_name,
4832
      primary_node=self.op.pnode,
4833
      secondary_nodes=self.secondaries,
4834
      status=self.op.start,
4835
      os_type=self.op.os_type,
4836
      memory=self.be_full[constants.BE_MEMORY],
4837
      vcpus=self.be_full[constants.BE_VCPUS],
4838
      nics=_NICListToTuple(self, self.nics),
4839
      disk_template=self.op.disk_template,
4840
      disks=[(d["size"], d["mode"]) for d in self.disks],
4841
      bep=self.be_full,
4842
      hvp=self.hv_full,
4843
      hypervisor_name=self.op.hypervisor,
4844
    ))
4845

    
4846
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
4847
          self.secondaries)
4848
    return env, nl, nl
4849

    
4850

    
4851
  def CheckPrereq(self):
4852
    """Check prerequisites.
4853

4854
    """
4855
    if (not self.cfg.GetVGName() and
4856
        self.op.disk_template not in constants.DTS_NOT_LVM):
4857
      raise errors.OpPrereqError("Cluster does not support lvm-based"
4858
                                 " instances")
4859

    
4860
    if self.op.mode == constants.INSTANCE_IMPORT:
4861
      src_node = self.op.src_node
4862
      src_path = self.op.src_path
4863

    
4864
      if src_node is None:
4865
        locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
4866
        exp_list = self.rpc.call_export_list(locked_nodes)
4867
        found = False
4868
        for node in exp_list:
4869
          if exp_list[node].fail_msg:
4870
            continue
4871
          if src_path in exp_list[node].payload:
4872
            found = True
4873
            self.op.src_node = src_node = node
4874
            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
4875
                                                       src_path)
4876
            break
4877
        if not found:
4878
          raise errors.OpPrereqError("No export found for relative path %s" %
4879
                                      src_path)
4880

    
4881
      _CheckNodeOnline(self, src_node)
4882
      result = self.rpc.call_export_info(src_node, src_path)
4883
      result.Raise("No export or invalid export found in dir %s" % src_path)
4884

    
4885
      export_info = objects.SerializableConfigParser.Loads(str(result.payload))
4886
      if not export_info.has_section(constants.INISECT_EXP):
4887
        raise errors.ProgrammerError("Corrupted export config")
4888

    
4889
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
4890
      if (int(ei_version) != constants.EXPORT_VERSION):
4891
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
4892
                                   (ei_version, constants.EXPORT_VERSION))
4893

    
4894
      # Check that the new instance doesn't have less disks than the export
4895
      instance_disks = len(self.disks)
4896
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
4897
      if instance_disks < export_disks:
4898
        raise errors.OpPrereqError("Not enough disks to import."
4899
                                   " (instance: %d, export: %d)" %
4900
                                   (instance_disks, export_disks))
4901

    
4902
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
4903
      disk_images = []
4904
      for idx in range(export_disks):
4905
        option = 'disk%d_dump' % idx
4906
        if export_info.has_option(constants.INISECT_INS, option):
4907
          # FIXME: are the old os-es, disk sizes, etc. useful?
4908
          export_name = export_info.get(constants.INISECT_INS, option)
4909
          image = os.path.join(src_path, export_name)
4910
          disk_images.append(image)
4911
        else:
4912
          disk_images.append(False)
4913

    
4914
      self.src_images = disk_images
4915

    
4916
      old_name = export_info.get(constants.INISECT_INS, 'name')
4917
      # FIXME: int() here could throw a ValueError on broken exports
4918
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
4919
      if self.op.instance_name == old_name:
4920
        for idx, nic in enumerate(self.nics):
4921
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
4922
            nic_mac_ini = 'nic%d_mac' % idx
4923
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
4924

    
4925
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
4926
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
4927
    if self.op.start and not self.op.ip_check:
4928
      raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
4929
                                 " adding an instance in start mode")
4930

    
4931
    if self.op.ip_check:
4932
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
4933
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4934
                                   (self.check_ip, self.op.instance_name))
4935

    
4936
    #### mac address generation
4937
    # By generating here the mac address both the allocator and the hooks get
4938
    # the real final mac address rather than the 'auto' or 'generate' value.
4939
    # There is a race condition between the generation and the instance object
4940
    # creation, which means that we know the mac is valid now, but we're not
4941
    # sure it will be when we actually add the instance. If things go bad
4942
    # adding the instance will abort because of a duplicate mac, and the
4943
    # creation job will fail.
4944
    for nic in self.nics:
4945
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
4946
        nic.mac = self.cfg.GenerateMAC()
4947

    
4948
    #### allocator run
4949

    
4950
    if self.op.iallocator is not None:
4951
      self._RunAllocator()
4952

    
4953
    #### node related checks
4954

    
4955
    # check primary node
4956
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
4957
    assert self.pnode is not None, \
4958
      "Cannot retrieve locked node %s" % self.op.pnode
4959
    if pnode.offline:
4960
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
4961
                                 pnode.name)
4962
    if pnode.drained:
4963
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
4964
                                 pnode.name)
4965

    
4966
    self.secondaries = []
4967

    
4968
    # mirror node verification
4969
    if self.op.disk_template in constants.DTS_NET_MIRROR:
4970
      if self.op.snode is None:
4971
        raise errors.OpPrereqError("The networked disk templates need"
4972
                                   " a mirror node")
4973
      if self.op.snode == pnode.name:
4974
        raise errors.OpPrereqError("The secondary node cannot be"
4975
                                   " the primary node.")
4976
      _CheckNodeOnline(self, self.op.snode)
4977
      _CheckNodeNotDrained(self, self.op.snode)
4978
      self.secondaries.append(self.op.snode)
4979

    
4980
    nodenames = [pnode.name] + self.secondaries
4981

    
4982
    req_size = _ComputeDiskSize(self.op.disk_template,
4983
                                self.disks)
4984

    
4985
    # Check lv size requirements
4986
    if req_size is not None:
4987
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
4988
                                         self.op.hypervisor)
4989
      for node in nodenames:
4990
        info = nodeinfo[node]
4991
        info.Raise("Cannot get current information from node %s" % node)
4992
        info = info.payload
4993
        vg_free = info.get('vg_free', None)
4994
        if not isinstance(vg_free, int):
4995
          raise errors.OpPrereqError("Can't compute free disk space on"
4996
                                     " node %s" % node)
4997
        if req_size > vg_free:
4998
          raise errors.OpPrereqError("Not enough disk space on target node %s."
4999
                                     " %d MB available, %d MB required" %
5000
                                     (node, vg_free, req_size))
5001

    
5002
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
5003

    
5004
    # os verification
5005
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
5006
    result.Raise("OS '%s' not in supported os list for primary node %s" %
5007
                 (self.op.os_type, pnode.name), prereq=True)
5008

    
5009
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
5010

    
5011
    # memory check on primary node
5012
    if self.op.start:
5013
      _CheckNodeFreeMemory(self, self.pnode.name,
5014
                           "creating instance %s" % self.op.instance_name,
5015
                           self.be_full[constants.BE_MEMORY],
5016
                           self.op.hypervisor)
5017

    
5018
    self.dry_run_result = list(nodenames)
5019

    
5020
  def Exec(self, feedback_fn):
5021
    """Create and add the instance to the cluster.
5022

5023
    """
5024
    instance = self.op.instance_name
5025
    pnode_name = self.pnode.name
5026

    
5027
    ht_kind = self.op.hypervisor
5028
    if ht_kind in constants.HTS_REQ_PORT:
5029
      network_port = self.cfg.AllocatePort()
5030
    else:
5031
      network_port = None
5032

    
5033
    ##if self.op.vnc_bind_address is None:
5034
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
5035

    
5036
    # this is needed because os.path.join does not accept None arguments
5037
    if self.op.file_storage_dir is None:
5038
      string_file_storage_dir = ""
5039
    else:
5040
      string_file_storage_dir = self.op.file_storage_dir
5041

    
5042
    # build the full file storage dir path
5043
    file_storage_dir = os.path.normpath(os.path.join(
5044
                                        self.cfg.GetFileStorageDir(),
5045
                                        string_file_storage_dir, instance))
5046

    
5047

    
5048
    disks = _GenerateDiskTemplate(self,
5049
                                  self.op.disk_template,
5050
                                  instance, pnode_name,
5051
                                  self.secondaries,
5052
                                  self.disks,
5053
                                  file_storage_dir,
5054
                                  self.op.file_driver,
5055
                                  0)
5056

    
5057
    iobj = objects.Instance(name=instance, os=self.op.os_type,
5058
                            primary_node=pnode_name,
5059
                            nics=self.nics, disks=disks,
5060
                            disk_template=self.op.disk_template,
5061
                            admin_up=False,
5062
                            network_port=network_port,
5063
                            beparams=self.op.beparams,
5064
                            hvparams=self.op.hvparams,
5065
                            hypervisor=self.op.hypervisor,
5066
                            )
5067

    
5068
    feedback_fn("* creating instance disks...")
5069
    try:
5070
      _CreateDisks(self, iobj)
5071
    except errors.OpExecError:
5072
      self.LogWarning("Device creation failed, reverting...")
5073
      try:
5074
        _RemoveDisks(self, iobj)
5075
      finally:
5076
        self.cfg.ReleaseDRBDMinors(instance)
5077
        raise
5078

    
5079
    feedback_fn("adding instance %s to cluster config" % instance)
5080

    
5081
    self.cfg.AddInstance(iobj)
5082
    # Declare that we don't want to remove the instance lock anymore, as we've
5083
    # added the instance to the config
5084
    del self.remove_locks[locking.LEVEL_INSTANCE]
5085
    # Unlock all the nodes
5086
    if self.op.mode == constants.INSTANCE_IMPORT:
5087
      nodes_keep = [self.op.src_node]
5088
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
5089
                       if node != self.op.src_node]
5090
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
5091
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
5092
    else:
5093
      self.context.glm.release(locking.LEVEL_NODE)
5094
      del self.acquired_locks[locking.LEVEL_NODE]
5095

    
5096
    if self.op.wait_for_sync:
5097
      disk_abort = not _WaitForSync(self, iobj)
5098
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
5099
      # make sure the disks are not degraded (still sync-ing is ok)
5100
      time.sleep(15)
5101
      feedback_fn("* checking mirrors status")
5102
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
5103
    else:
5104
      disk_abort = False
5105

    
5106
    if disk_abort:
5107
      _RemoveDisks(self, iobj)
5108
      self.cfg.RemoveInstance(iobj.name)
5109
      # Make sure the instance lock gets removed
5110
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
5111
      raise errors.OpExecError("There are some degraded disks for"
5112
                               " this instance")
5113

    
5114
    feedback_fn("creating os for instance %s on node %s" %
5115
                (instance, pnode_name))
5116

    
5117
    if iobj.disk_template != constants.DT_DISKLESS:
5118
      if self.op.mode == constants.INSTANCE_CREATE:
5119
        feedback_fn("* running the instance OS create scripts...")
5120
        result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
5121
        result.Raise("Could not add os for instance %s"
5122
                     " on node %s" % (instance, pnode_name))
5123

    
5124
      elif self.op.mode == constants.INSTANCE_IMPORT:
5125
        feedback_fn("* running the instance OS import scripts...")
5126
        src_node = self.op.src_node
5127
        src_images = self.src_images
5128
        cluster_name = self.cfg.GetClusterName()
5129
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
5130
                                                         src_node, src_images,
5131
                                                         cluster_name)
5132
        msg = import_result.fail_msg
5133
        if msg:
5134
          self.LogWarning("Error while importing the disk images for instance"
5135
                          " %s on node %s: %s" % (instance, pnode_name, msg))
5136
      else:
5137
        # also checked in the prereq part
5138
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
5139
                                     % self.op.mode)
5140

    
5141
    if self.op.start:
5142
      iobj.admin_up = True
5143
      self.cfg.Update(iobj)
5144
      logging.info("Starting instance %s on node %s", instance, pnode_name)
5145
      feedback_fn("* starting instance...")
5146
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
5147
      result.Raise("Could not start instance")
5148

    
5149
    return list(iobj.all_nodes)
5150

    
5151

    
5152
class LUConnectConsole(NoHooksLU):
5153
  """Connect to an instance's console.
5154

5155
  This is somewhat special in that it returns the command line that
5156
  you need to run on the master node in order to connect to the
5157
  console.
5158

5159
  """
5160
  _OP_REQP = ["instance_name"]
5161
  REQ_BGL = False
5162

    
5163
  def ExpandNames(self):
5164
    self._ExpandAndLockInstance()
5165

    
5166
  def CheckPrereq(self):
5167
    """Check prerequisites.
5168

5169
    This checks that the instance is in the cluster.
5170

5171
    """
5172
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5173
    assert self.instance is not None, \
5174
      "Cannot retrieve locked instance %s" % self.op.instance_name
5175
    _CheckNodeOnline(self, self.instance.primary_node)
5176

    
5177
  def Exec(self, feedback_fn):
5178
    """Connect to the console of an instance
5179

5180
    """
5181
    instance = self.instance
5182
    node = instance.primary_node
5183

    
5184
    node_insts = self.rpc.call_instance_list([node],
5185
                                             [instance.hypervisor])[node]
5186
    node_insts.Raise("Can't get node information from %s" % node)
5187

    
5188
    if instance.name not in node_insts.payload:
5189
      raise errors.OpExecError("Instance %s is not running." % instance.name)
5190

    
5191
    logging.debug("Connecting to console of %s on %s", instance.name, node)
5192

    
5193
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
5194
    cluster = self.cfg.GetClusterInfo()
5195
    # beparams and hvparams are passed separately, to avoid editing the
5196
    # instance and then saving the defaults in the instance itself.
5197
    hvparams = cluster.FillHV(instance)
5198
    beparams = cluster.FillBE(instance)
5199
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
5200

    
5201
    # build ssh cmdline
5202
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
5203

    
5204

    
5205
class LUReplaceDisks(LogicalUnit):
5206
  """Replace the disks of an instance.
5207

5208
  """
5209
  HPATH = "mirrors-replace"
5210
  HTYPE = constants.HTYPE_INSTANCE
5211
  _OP_REQP = ["instance_name", "mode", "disks"]
5212
  REQ_BGL = False
5213

    
5214
  def CheckArguments(self):
5215
    if not hasattr(self.op, "remote_node"):
5216
      self.op.remote_node = None
5217
    if not hasattr(self.op, "iallocator"):
5218
      self.op.iallocator = None
5219

    
5220
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
5221
                                  self.op.iallocator)
5222

    
5223
  def ExpandNames(self):
5224
    self._ExpandAndLockInstance()
5225

    
5226
    if self.op.iallocator is not None:
5227
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5228

    
5229
    elif self.op.remote_node is not None:
5230
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
5231
      if remote_node is None:
5232
        raise errors.OpPrereqError("Node '%s' not known" %
5233
                                   self.op.remote_node)
5234

    
5235
      self.op.remote_node = remote_node
5236

    
5237
      # Warning: do not remove the locking of the new secondary here
5238
      # unless DRBD8.AddChildren is changed to work in parallel;
5239
      # currently it doesn't since parallel invocations of
5240
      # FindUnusedMinor will conflict
5241
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
5242
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5243

    
5244
    else:
5245
      self.needed_locks[locking.LEVEL_NODE] = []
5246
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5247

    
5248
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
5249
                                   self.op.iallocator, self.op.remote_node,
5250
                                   self.op.disks)
5251

    
5252
    self.tasklets.append(self.replacer)
5253

    
5254
  def DeclareLocks(self, level):
5255
    # If we're not already locking all nodes in the set we have to declare the
5256
    # instance's primary/secondary nodes.
5257
    if (level == locking.LEVEL_NODE and
5258
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
5259
      self._LockInstancesNodes()
5260

    
5261
  def BuildHooksEnv(self):
5262
    """Build hooks env.
5263

5264
    This runs on the master, the primary and all the secondaries.
5265

5266
    """
5267
    instance = self.replacer.instance
5268
    env = {
5269
      "MODE": self.op.mode,
5270
      "NEW_SECONDARY": self.op.remote_node,
5271
      "OLD_SECONDARY": instance.secondary_nodes[0],
5272
      }
5273
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5274
    nl = [
5275
      self.cfg.GetMasterNode(),
5276
      instance.primary_node,
5277
      ]
5278
    if self.op.remote_node is not None:
5279
      nl.append(self.op.remote_node)
5280
    return env, nl, nl
5281

    
5282

    
5283
class LUEvacuateNode(LogicalUnit):
5284
  """Relocate the secondary instances from a node.
5285

5286
  """
5287
  HPATH = "node-evacuate"
5288
  HTYPE = constants.HTYPE_NODE
5289
  _OP_REQP = ["node_name"]
5290
  REQ_BGL = False
5291

    
5292
  def CheckArguments(self):
5293
    if not hasattr(self.op, "remote_node"):
5294
      self.op.remote_node = None
5295
    if not hasattr(self.op, "iallocator"):
5296
      self.op.iallocator = None
5297

    
5298
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
5299
                                  self.op.remote_node,
5300
                                  self.op.iallocator)
5301

    
5302
  def ExpandNames(self):
5303
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
5304
    if self.op.node_name is None:
5305
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
5306

    
5307
    self.needed_locks = {}
5308

    
5309
    # Declare node locks
5310
    if self.op.iallocator is not None:
5311
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5312

    
5313
    elif self.op.remote_node is not None:
5314
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
5315
      if remote_node is None:
5316
        raise errors.OpPrereqError("Node '%s' not known" %
5317
                                   self.op.remote_node)
5318

    
5319
      self.op.remote_node = remote_node
5320

    
5321
      # Warning: do not remove the locking of the new secondary here
5322
      # unless DRBD8.AddChildren is changed to work in parallel;
5323
      # currently it doesn't since parallel invocations of
5324
      # FindUnusedMinor will conflict
5325
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
5326
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5327

    
5328
    else:
5329
      raise errors.OpPrereqError("Invalid parameters")
5330

    
5331
    # Create tasklets for replacing disks for all secondary instances on this
5332
    # node
5333
    names = []
5334

    
5335
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
5336
      logging.debug("Replacing disks for instance %s", inst.name)
5337
      names.append(inst.name)
5338

    
5339
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
5340
                                self.op.iallocator, self.op.remote_node, [])
5341
      self.tasklets.append(replacer)
5342

    
5343
    self.instance_names = names
5344

    
5345
    # Declare instance locks
5346
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
5347

    
5348
  def DeclareLocks(self, level):
5349
    # If we're not already locking all nodes in the set we have to declare the
5350
    # instance's primary/secondary nodes.
5351
    if (level == locking.LEVEL_NODE and
5352
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
5353
      self._LockInstancesNodes()
5354

    
5355
  def BuildHooksEnv(self):
5356
    """Build hooks env.
5357

5358
    This runs on the master, the primary and all the secondaries.
5359

5360
    """
5361
    env = {
5362
      "NODE_NAME": self.op.node_name,
5363
      }
5364

    
5365
    nl = [self.cfg.GetMasterNode()]
5366

    
5367
    if self.op.remote_node is not None:
5368
      env["NEW_SECONDARY"] = self.op.remote_node
5369
      nl.append(self.op.remote_node)
5370

    
5371
    return (env, nl, nl)
5372

    
5373

    
5374
class TLReplaceDisks(Tasklet):
5375
  """Replaces disks for an instance.
5376

5377
  Note: Locking is not within the scope of this class.
5378

5379
  """
5380
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
5381
               disks):
5382
    """Initializes this class.
5383

5384
    """
5385
    Tasklet.__init__(self, lu)
5386

    
5387
    # Parameters
5388
    self.instance_name = instance_name
5389
    self.mode = mode
5390
    self.iallocator_name = iallocator_name
5391
    self.remote_node = remote_node
5392
    self.disks = disks
5393

    
5394
    # Runtime data
5395
    self.instance = None
5396
    self.new_node = None
5397
    self.target_node = None
5398
    self.other_node = None
5399
    self.remote_node_info = None
5400
    self.node_secondary_ip = None
5401

    
5402
  @staticmethod
5403
  def CheckArguments(mode, remote_node, iallocator):
5404
    """Helper function for users of this class.
5405

5406
    """
5407
    # check for valid parameter combination
5408
    cnt = [remote_node, iallocator].count(None)
5409
    if mode == constants.REPLACE_DISK_CHG:
5410
      if cnt == 2:
5411
        raise errors.OpPrereqError("When changing the secondary either an"
5412
                                   " iallocator script must be used or the"
5413
                                   " new node given")
5414
      elif cnt == 0:
5415
        raise errors.OpPrereqError("Give either the iallocator or the new"
5416
                                   " secondary, not both")
5417
    else: # not replacing the secondary
5418
      if cnt != 2:
5419
        raise errors.OpPrereqError("The iallocator and new node options can"
5420
                                   " be used only when changing the"
5421
                                   " secondary node")
5422

    
5423
  @staticmethod
5424
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
5425
    """Compute a new secondary node using an IAllocator.
5426

5427
    """
5428
    ial = IAllocator(lu.cfg, lu.rpc,
5429
                     mode=constants.IALLOCATOR_MODE_RELOC,
5430
                     name=instance_name,
5431
                     relocate_from=relocate_from)
5432

    
5433
    ial.Run(iallocator_name)
5434

    
5435
    if not ial.success:
5436
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
5437
                                 " %s" % (iallocator_name, ial.info))
5438

    
5439
    if len(ial.nodes) != ial.required_nodes:
5440
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5441
                                 " of nodes (%s), required %s" %
5442
                                 (len(ial.nodes), ial.required_nodes))
5443

    
5444
    remote_node_name = ial.nodes[0]
5445

    
5446
    lu.LogInfo("Selected new secondary for instance '%s': %s",
5447
               instance_name, remote_node_name)
5448

    
5449
    return remote_node_name
5450

    
5451
  def CheckPrereq(self):
5452
    """Check prerequisites.
5453

5454
    This checks that the instance is in the cluster.
5455

5456
    """
5457
    self.instance = self.cfg.GetInstanceInfo(self.instance_name)
5458
    assert self.instance is not None, \
5459
      "Cannot retrieve locked instance %s" % self.instance_name
5460

    
5461
    if self.instance.disk_template != constants.DT_DRBD8:
5462
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
5463
                                 " instances")
5464

    
5465
    if len(self.instance.secondary_nodes) != 1:
5466
      raise errors.OpPrereqError("The instance has a strange layout,"
5467
                                 " expected one secondary but found %d" %
5468
                                 len(self.instance.secondary_nodes))
5469

    
5470
    secondary_node = self.instance.secondary_nodes[0]
5471

    
5472
    if self.iallocator_name is None:
5473
      remote_node = self.remote_node
5474
    else:
5475
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
5476
                                       self.instance.name, secondary_node)
5477

    
5478
    if remote_node is not None:
5479
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
5480
      assert self.remote_node_info is not None, \
5481
        "Cannot retrieve locked node %s" % remote_node
5482
    else:
5483
      self.remote_node_info = None
5484

    
5485
    if remote_node == self.instance.primary_node:
5486
      raise errors.OpPrereqError("The specified node is the primary node of"
5487
                                 " the instance.")
5488

    
5489
    if remote_node == secondary_node:
5490
      raise errors.OpPrereqError("The specified node is already the"
5491
                                 " secondary node of the instance.")
5492

    
5493
    if self.mode == constants.REPLACE_DISK_PRI:
5494
      self.target_node = self.instance.primary_node
5495
      self.other_node = secondary_node
5496
      check_nodes = [self.target_node, self.other_node]
5497

    
5498
    elif self.mode == constants.REPLACE_DISK_SEC:
5499
      self.target_node = secondary_node
5500
      self.other_node = self.instance.primary_node
5501
      check_nodes = [self.target_node, self.other_node]
5502

    
5503
    elif self.mode == constants.REPLACE_DISK_CHG:
5504
      self.new_node = remote_node
5505
      self.other_node = self.instance.primary_node
5506
      self.target_node = secondary_node
5507
      check_nodes = [self.new_node, self.other_node]
5508

    
5509
      _CheckNodeNotDrained(self.lu, remote_node)
5510

    
5511
    else:
5512
      raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
5513
                                   self.mode)
5514

    
5515
    for node in check_nodes:
5516
      _CheckNodeOnline(self.lu, node)
5517

    
5518
    # If not specified all disks should be replaced
5519
    if not self.disks:
5520
      self.disks = range(len(self.instance.disks))
5521

    
5522
    # Check whether disks are valid
5523
    for disk_idx in self.disks:
5524
      self.instance.FindDisk(disk_idx)
5525

    
5526
    # Get secondary node IP addresses
5527
    node_2nd_ip = {}
5528

    
5529
    for node_name in [self.target_node, self.other_node, self.new_node]:
5530
      if node_name is not None:
5531
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
5532

    
5533
    self.node_secondary_ip = node_2nd_ip
5534

    
5535
  def Exec(self, feedback_fn):
5536
    """Execute disk replacement.
5537

5538
    This dispatches the disk replacement to the appropriate handler.
5539

5540
    """
5541
    feedback_fn("Replacing disks for %s" % self.instance.name)
5542

    
5543
    activate_disks = (not self.instance.admin_up)
5544

    
5545
    # Activate the instance disks if we're replacing them on a down instance
5546
    if activate_disks:
5547
      _StartInstanceDisks(self.lu, self.instance, True)
5548

    
5549
    try:
5550
      if self.mode == constants.REPLACE_DISK_CHG:
5551
        return self._ExecDrbd8Secondary()
5552
      else:
5553
        return self._ExecDrbd8DiskOnly()
5554

    
5555
    finally:
5556
      # Deactivate the instance disks if we're replacing them on a down instance
5557
      if activate_disks:
5558
        _SafeShutdownInstanceDisks(self.lu, self.instance)
5559

    
5560
  def _CheckVolumeGroup(self, nodes):
5561
    self.lu.LogInfo("Checking volume groups")
5562

    
5563
    vgname = self.cfg.GetVGName()
5564

    
5565
    # Make sure volume group exists on all involved nodes
5566
    results = self.rpc.call_vg_list(nodes)
5567
    if not results:
5568
      raise errors.OpExecError("Can't list volume groups on the nodes")
5569

    
5570
    for node in nodes:
5571
      res = results[node]
5572
      res.Raise("Error checking node %s" % node)
5573
      if vgname not in res.payload:
5574
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
5575
                                 (vgname, node))
5576

    
5577
  def _CheckDisksExistence(self, nodes):
5578
    # Check disk existence
5579
    for idx, dev in enumerate(self.instance.disks):
5580
      if idx not in self.disks:
5581
        continue
5582

    
5583
      for node in nodes:
5584
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
5585
        self.cfg.SetDiskID(dev, node)
5586

    
5587
        result = self.rpc.call_blockdev_find(node, dev)
5588

    
5589
        msg = result.fail_msg
5590
        if msg or not result.payload:
5591
          if not msg:
5592
            msg = "disk not found"
5593
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
5594
                                   (idx, node, msg))
5595

    
5596
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
5597
    for idx, dev in enumerate(self.instance.disks):
5598
      if idx not in self.disks:
5599
        continue
5600

    
5601
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
5602
                      (idx, node_name))
5603

    
5604
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
5605
                                   ldisk=ldisk):
5606
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
5607
                                 " replace disks for instance %s" %
5608
                                 (node_name, self.instance.name))
5609

    
5610
  def _CreateNewStorage(self, node_name):
5611
    vgname = self.cfg.GetVGName()
5612
    iv_names = {}
5613

    
5614
    for idx, dev in enumerate(self.instance.disks):
5615
      if idx not in self.disks:
5616
        continue
5617

    
5618
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
5619

    
5620
      self.cfg.SetDiskID(dev, node_name)
5621

    
5622
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
5623
      names = _GenerateUniqueNames(self.lu, lv_names)
5624

    
5625
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
5626
                             logical_id=(vgname, names[0]))
5627
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5628
                             logical_id=(vgname, names[1]))
5629

    
5630
      new_lvs = [lv_data, lv_meta]
5631
      old_lvs = dev.children
5632
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
5633

    
5634
      # we pass force_create=True to force the LVM creation
5635
      for new_lv in new_lvs:
5636
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
5637
                        _GetInstanceInfoText(self.instance), False)
5638

    
5639
    return iv_names
5640

    
5641
  def _CheckDevices(self, node_name, iv_names):
5642
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
5643
      self.cfg.SetDiskID(dev, node_name)
5644

    
5645
      result = self.rpc.call_blockdev_find(node_name, dev)
5646

    
5647
      msg = result.fail_msg
5648
      if msg or not result.payload:
5649
        if not msg:
5650
          msg = "disk not found"
5651
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
5652
                                 (name, msg))
5653

    
5654
      if result.payload[5]:
5655
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
5656

    
5657
  def _RemoveOldStorage(self, node_name, iv_names):
5658
    for name, (dev, old_lvs, _) in iv_names.iteritems():
5659
      self.lu.LogInfo("Remove logical volumes for %s" % name)
5660

    
5661
      for lv in old_lvs:
5662
        self.cfg.SetDiskID(lv, node_name)
5663

    
5664
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
5665
        if msg:
5666
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
5667
                             hint="remove unused LVs manually")
5668

    
5669
  def _ExecDrbd8DiskOnly(self):
5670
    """Replace a disk on the primary or secondary for DRBD 8.
5671

5672
    The algorithm for replace is quite complicated:
5673

5674
      1. for each disk to be replaced:
5675

5676
        1. create new LVs on the target node with unique names
5677
        1. detach old LVs from the drbd device
5678
        1. rename old LVs to name_replaced.<time_t>
5679
        1. rename new LVs to old LVs
5680
        1. attach the new LVs (with the old names now) to the drbd device
5681

5682
      1. wait for sync across all devices
5683

5684
      1. for each modified disk:
5685

5686
        1. remove old LVs (which have the name name_replaces.<time_t>)
5687

5688
    Failures are not very well handled.
5689

5690
    """
5691
    steps_total = 6
5692

    
5693
    # Step: check device activation
5694
    self.lu.LogStep(1, steps_total, "Check device existence")
5695
    self._CheckDisksExistence([self.other_node, self.target_node])
5696
    self._CheckVolumeGroup([self.target_node, self.other_node])
5697

    
5698
    # Step: check other node consistency
5699
    self.lu.LogStep(2, steps_total, "Check peer consistency")
5700
    self._CheckDisksConsistency(self.other_node,
5701
                                self.other_node == self.instance.primary_node,
5702
                                False)
5703

    
5704
    # Step: create new storage
5705
    self.lu.LogStep(3, steps_total, "Allocate new storage")
5706
    iv_names = self._CreateNewStorage(self.target_node)
5707

    
5708
    # Step: for each lv, detach+rename*2+attach
5709
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
5710
    for dev, old_lvs, new_lvs in iv_names.itervalues():
5711
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
5712

    
5713
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev, old_lvs)
5714
      result.Raise("Can't detach drbd from local storage on node"
5715
                   " %s for device %s" % (self.target_node, dev.iv_name))
5716
      #dev.children = []
5717
      #cfg.Update(instance)
5718

    
5719
      # ok, we created the new LVs, so now we know we have the needed
5720
      # storage; as such, we proceed on the target node to rename
5721
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
5722
      # using the assumption that logical_id == physical_id (which in
5723
      # turn is the unique_id on that node)
5724

    
5725
      # FIXME(iustin): use a better name for the replaced LVs
5726
      temp_suffix = int(time.time())
5727
      ren_fn = lambda d, suff: (d.physical_id[0],
5728
                                d.physical_id[1] + "_replaced-%s" % suff)
5729

    
5730
      # Build the rename list based on what LVs exist on the node
5731
      rename_old_to_new = []
5732
      for to_ren in old_lvs:
5733
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
5734
        if not result.fail_msg and result.payload:
5735
          # device exists
5736
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
5737

    
5738
      self.lu.LogInfo("Renaming the old LVs on the target node")
5739
      result = self.rpc.call_blockdev_rename(self.target_node, rename_old_to_new)
5740
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
5741

    
5742
      # Now we rename the new LVs to the old LVs
5743
      self.lu.LogInfo("Renaming the new LVs on the target node")
5744
      rename_new_to_old = [(new, old.physical_id)
5745
                           for old, new in zip(old_lvs, new_lvs)]
5746
      result = self.rpc.call_blockdev_rename(self.target_node, rename_new_to_old)
5747
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
5748

    
5749
      for old, new in zip(old_lvs, new_lvs):
5750
        new.logical_id = old.logical_id
5751
        self.cfg.SetDiskID(new, self.target_node)
5752

    
5753
      for disk in old_lvs:
5754
        disk.logical_id = ren_fn(disk, temp_suffix)
5755
        self.cfg.SetDiskID(disk, self.target_node)
5756

    
5757
      # Now that the new lvs have the old name, we can add them to the device
5758
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
5759
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev, new_lvs)
5760
      msg = result.fail_msg
5761
      if msg:
5762
        for new_lv in new_lvs:
5763
          msg2 = self.rpc.call_blockdev_remove(self.target_node, new_lv).fail_msg
5764
          if msg2:
5765
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
5766
                               hint=("cleanup manually the unused logical"
5767
                                     "volumes"))
5768
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
5769

    
5770
      dev.children = new_lvs
5771

    
5772
      self.cfg.Update(self.instance)
5773

    
5774
    # Wait for sync
5775
    # This can fail as the old devices are degraded and _WaitForSync
5776
    # does a combined result over all disks, so we don't check its return value
5777
    self.lu.LogStep(5, steps_total, "Sync devices")
5778
    _WaitForSync(self.lu, self.instance, unlock=True)
5779

    
5780
    # Check all devices manually
5781
    self._CheckDevices(self.instance.primary_node, iv_names)
5782

    
5783
    # Step: remove old storage
5784
    self.lu.LogStep(6, steps_total, "Removing old storage")
5785
    self._RemoveOldStorage(self.target_node, iv_names)
5786

    
5787
  def _ExecDrbd8Secondary(self):
5788
    """Replace the secondary node for DRBD 8.
5789

5790
    The algorithm for replace is quite complicated:
5791
      - for all disks of the instance:
5792
        - create new LVs on the new node with same names
5793
        - shutdown the drbd device on the old secondary
5794
        - disconnect the drbd network on the primary
5795
        - create the drbd device on the new secondary
5796
        - network attach the drbd on the primary, using an artifice:
5797
          the drbd code for Attach() will connect to the network if it
5798
          finds a device which is connected to the good local disks but
5799
          not network enabled
5800
      - wait for sync across all devices
5801
      - remove all disks from the old secondary
5802

5803
    Failures are not very well handled.
5804

5805
    """
5806
    steps_total = 6
5807

    
5808
    # Step: check device activation
5809
    self.lu.LogStep(1, steps_total, "Check device existence")
5810
    self._CheckDisksExistence([self.instance.primary_node])
5811
    self._CheckVolumeGroup([self.instance.primary_node])
5812

    
5813
    # Step: check other node consistency
5814
    self.lu.LogStep(2, steps_total, "Check peer consistency")
5815
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
5816

    
5817
    # Step: create new storage
5818
    self.lu.LogStep(3, steps_total, "Allocate new storage")
5819
    for idx, dev in enumerate(self.instance.disks):
5820
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
5821
                      (self.new_node, idx))
5822
      # we pass force_create=True to force LVM creation
5823
      for new_lv in dev.children:
5824
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
5825
                        _GetInstanceInfoText(self.instance), False)
5826

    
5827
    # Step 4: dbrd minors and drbd setups changes
5828
    # after this, we must manually remove the drbd minors on both the
5829
    # error and the success paths
5830
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
5831
    minors = self.cfg.AllocateDRBDMinor([self.new_node for dev in self.instance.disks],
5832
                                        self.instance.name)
5833
    logging.debug("Allocated minors %r" % (minors,))
5834

    
5835
    iv_names = {}
5836
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
5837
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" % (self.new_node, idx))
5838
      # create new devices on new_node; note that we create two IDs:
5839
      # one without port, so the drbd will be activated without
5840
      # networking information on the new node at this stage, and one
5841
      # with network, for the latter activation in step 4
5842
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
5843
      if self.instance.primary_node == o_node1:
5844
        p_minor = o_minor1
5845
      else:
5846
        p_minor = o_minor2
5847

    
5848
      new_alone_id = (self.instance.primary_node, self.new_node, None, p_minor, new_minor, o_secret)
5849
      new_net_id = (self.instance.primary_node, self.new_node, o_port, p_minor, new_minor, o_secret)
5850

    
5851
      iv_names[idx] = (dev, dev.children, new_net_id)
5852
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
5853
                    new_net_id)
5854
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
5855
                              logical_id=new_alone_id,
5856
                              children=dev.children,
5857
                              size=dev.size)
5858
      try:
5859
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
5860
                              _GetInstanceInfoText(self.instance), False)
5861
      except errors.GenericError:
5862
        self.cfg.ReleaseDRBDMinors(self.instance.name)
5863
        raise
5864

    
5865
    # We have new devices, shutdown the drbd on the old secondary
5866
    for idx, dev in enumerate(self.instance.disks):
5867
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
5868
      self.cfg.SetDiskID(dev, self.target_node)
5869
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
5870
      if msg:
5871
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
5872
                           "node: %s" % (idx, msg),
5873
                           hint=("Please cleanup this device manually as"
5874
                                 " soon as possible"))
5875

    
5876
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
5877
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node], self.node_secondary_ip,
5878
                                               self.instance.disks)[self.instance.primary_node]
5879

    
5880
    msg = result.fail_msg
5881
    if msg:
5882
      # detaches didn't succeed (unlikely)
5883
      self.cfg.ReleaseDRBDMinors(self.instance.name)
5884
      raise errors.OpExecError("Can't detach the disks from the network on"
5885
                               " old node: %s" % (msg,))
5886

    
5887
    # if we managed to detach at least one, we update all the disks of
5888
    # the instance to point to the new secondary
5889
    self.lu.LogInfo("Updating instance configuration")
5890
    for dev, _, new_logical_id in iv_names.itervalues():
5891
      dev.logical_id = new_logical_id
5892
      self.cfg.SetDiskID(dev, self.instance.primary_node)
5893

    
5894
    self.cfg.Update(self.instance)
5895

    
5896
    # and now perform the drbd attach
5897
    self.lu.LogInfo("Attaching primary drbds to new secondary"
5898
                    " (standalone => connected)")
5899
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node, self.new_node], self.node_secondary_ip,
5900
                                           self.instance.disks, self.instance.name,
5901
                                           False)
5902
    for to_node, to_result in result.items():
5903
      msg = to_result.fail_msg
5904
      if msg:
5905
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s", to_node, msg,
5906
                           hint=("please do a gnt-instance info to see the"
5907
                                 " status of disks"))
5908

    
5909
    # Wait for sync
5910
    # This can fail as the old devices are degraded and _WaitForSync
5911
    # does a combined result over all disks, so we don't check its return value
5912
    self.lu.LogStep(5, steps_total, "Sync devices")
5913
    _WaitForSync(self.lu, self.instance, unlock=True)
5914

    
5915
    # Check all devices manually
5916
    self._CheckDevices(self.instance.primary_node, iv_names)
5917

    
5918
    # Step: remove old storage
5919
    self.lu.LogStep(6, steps_total, "Removing old storage")
5920
    self._RemoveOldStorage(self.target_node, iv_names)
5921

    
5922

    
5923
class LUGrowDisk(LogicalUnit):
5924
  """Grow a disk of an instance.
5925

5926
  """
5927
  HPATH = "disk-grow"
5928
  HTYPE = constants.HTYPE_INSTANCE
5929
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
5930
  REQ_BGL = False
5931

    
5932
  def ExpandNames(self):
5933
    self._ExpandAndLockInstance()
5934
    self.needed_locks[locking.LEVEL_NODE] = []
5935
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5936

    
5937
  def DeclareLocks(self, level):
5938
    if level == locking.LEVEL_NODE:
5939
      self._LockInstancesNodes()
5940

    
5941
  def BuildHooksEnv(self):
5942
    """Build hooks env.
5943

5944
    This runs on the master, the primary and all the secondaries.
5945

5946
    """
5947
    env = {
5948
      "DISK": self.op.disk,
5949
      "AMOUNT": self.op.amount,
5950
      }
5951
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5952
    nl = [
5953
      self.cfg.GetMasterNode(),
5954
      self.instance.primary_node,
5955
      ]
5956
    return env, nl, nl
5957

    
5958
  def CheckPrereq(self):
5959
    """Check prerequisites.
5960

5961
    This checks that the instance is in the cluster.
5962

5963
    """
5964
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5965
    assert instance is not None, \
5966
      "Cannot retrieve locked instance %s" % self.op.instance_name
5967
    nodenames = list(instance.all_nodes)
5968
    for node in nodenames:
5969
      _CheckNodeOnline(self, node)
5970

    
5971

    
5972
    self.instance = instance
5973

    
5974
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
5975
      raise errors.OpPrereqError("Instance's disk layout does not support"
5976
                                 " growing.")
5977

    
5978
    self.disk = instance.FindDisk(self.op.disk)
5979

    
5980
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
5981
                                       instance.hypervisor)
5982
    for node in nodenames:
5983
      info = nodeinfo[node]
5984
      info.Raise("Cannot get current information from node %s" % node)
5985
      vg_free = info.payload.get('vg_free', None)
5986
      if not isinstance(vg_free, int):
5987
        raise errors.OpPrereqError("Can't compute free disk space on"
5988
                                   " node %s" % node)
5989
      if self.op.amount > vg_free:
5990
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
5991
                                   " %d MiB available, %d MiB required" %
5992
                                   (node, vg_free, self.op.amount))
5993

    
5994
  def Exec(self, feedback_fn):
5995
    """Execute disk grow.
5996

5997
    """
5998
    instance = self.instance
5999
    disk = self.disk
6000
    for node in instance.all_nodes:
6001
      self.cfg.SetDiskID(disk, node)
6002
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
6003
      result.Raise("Grow request failed to node %s" % node)
6004
    disk.RecordGrow(self.op.amount)
6005
    self.cfg.Update(instance)
6006
    if self.op.wait_for_sync:
6007
      disk_abort = not _WaitForSync(self, instance)
6008
      if disk_abort:
6009
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
6010
                             " status.\nPlease check the instance.")
6011

    
6012

    
6013
class LUQueryInstanceData(NoHooksLU):
6014
  """Query runtime instance data.
6015

6016
  """
6017
  _OP_REQP = ["instances", "static"]
6018
  REQ_BGL = False
6019

    
6020
  def ExpandNames(self):
6021
    self.needed_locks = {}
6022
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
6023

    
6024
    if not isinstance(self.op.instances, list):
6025
      raise errors.OpPrereqError("Invalid argument type 'instances'")
6026

    
6027
    if self.op.instances:
6028
      self.wanted_names = []
6029
      for name in self.op.instances:
6030
        full_name = self.cfg.ExpandInstanceName(name)
6031
        if full_name is None:
6032
          raise errors.OpPrereqError("Instance '%s' not known" % name)
6033
        self.wanted_names.append(full_name)
6034
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
6035
    else:
6036
      self.wanted_names = None
6037
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
6038

    
6039
    self.needed_locks[locking.LEVEL_NODE] = []
6040
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6041

    
6042
  def DeclareLocks(self, level):
6043
    if level == locking.LEVEL_NODE:
6044
      self._LockInstancesNodes()
6045

    
6046
  def CheckPrereq(self):
6047
    """Check prerequisites.
6048

6049
    This only checks the optional instance list against the existing names.
6050

6051
    """
6052
    if self.wanted_names is None:
6053
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
6054

    
6055
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
6056
                             in self.wanted_names]
6057
    return
6058

    
6059
  def _ComputeDiskStatus(self, instance, snode, dev):
6060
    """Compute block device status.
6061

6062
    """
6063
    static = self.op.static
6064
    if not static:
6065
      self.cfg.SetDiskID(dev, instance.primary_node)
6066
      dev_pstatus = self.rpc.call_blockdev_find(instance.primary_node, dev)
6067
      if dev_pstatus.offline:
6068
        dev_pstatus = None
6069
      else:
6070
        dev_pstatus.Raise("Can't compute disk status for %s" % instance.name)
6071
        dev_pstatus = dev_pstatus.payload
6072
    else:
6073
      dev_pstatus = None
6074

    
6075
    if dev.dev_type in constants.LDS_DRBD:
6076
      # we change the snode then (otherwise we use the one passed in)
6077
      if dev.logical_id[0] == instance.primary_node:
6078
        snode = dev.logical_id[1]
6079
      else:
6080
        snode = dev.logical_id[0]
6081

    
6082
    if snode and not static:
6083
      self.cfg.SetDiskID(dev, snode)
6084
      dev_sstatus = self.rpc.call_blockdev_find(snode, dev)
6085
      if dev_sstatus.offline:
6086
        dev_sstatus = None
6087
      else:
6088
        dev_sstatus.Raise("Can't compute disk status for %s" % instance.name)
6089
        dev_sstatus = dev_sstatus.payload
6090
    else:
6091
      dev_sstatus = None
6092

    
6093
    if dev.children:
6094
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
6095
                      for child in dev.children]
6096
    else:
6097
      dev_children = []
6098

    
6099
    data = {
6100
      "iv_name": dev.iv_name,
6101
      "dev_type": dev.dev_type,
6102
      "logical_id": dev.logical_id,
6103
      "physical_id": dev.physical_id,
6104
      "pstatus": dev_pstatus,
6105
      "sstatus": dev_sstatus,
6106
      "children": dev_children,
6107
      "mode": dev.mode,
6108
      "size": dev.size,
6109
      }
6110

    
6111
    return data
6112

    
6113
  def Exec(self, feedback_fn):
6114
    """Gather and return data"""
6115
    result = {}
6116

    
6117
    cluster = self.cfg.GetClusterInfo()
6118

    
6119
    for instance in self.wanted_instances:
6120
      if not self.op.static:
6121
        remote_info = self.rpc.call_instance_info(instance.primary_node,
6122
                                                  instance.name,
6123
                                                  instance.hypervisor)
6124
        remote_info.Raise("Error checking node %s" % instance.primary_node)
6125
        remote_info = remote_info.payload
6126
        if remote_info and "state" in remote_info:
6127
          remote_state = "up"
6128
        else:
6129
          remote_state = "down"
6130
      else:
6131
        remote_state = None
6132
      if instance.admin_up:
6133
        config_state = "up"
6134
      else:
6135
        config_state = "down"
6136

    
6137
      disks = [self._ComputeDiskStatus(instance, None, device)
6138
               for device in instance.disks]
6139

    
6140
      idict = {
6141
        "name": instance.name,
6142
        "config_state": config_state,
6143
        "run_state": remote_state,
6144
        "pnode": instance.primary_node,
6145
        "snodes": instance.secondary_nodes,
6146
        "os": instance.os,
6147
        # this happens to be the same format used for hooks
6148
        "nics": _NICListToTuple(self, instance.nics),
6149
        "disks": disks,
6150
        "hypervisor": instance.hypervisor,
6151
        "network_port": instance.network_port,
6152
        "hv_instance": instance.hvparams,
6153
        "hv_actual": cluster.FillHV(instance),
6154
        "be_instance": instance.beparams,
6155
        "be_actual": cluster.FillBE(instance),
6156
        }
6157

    
6158
      result[instance.name] = idict
6159

    
6160
    return result
6161

    
6162

    
6163
class LUSetInstanceParams(LogicalUnit):
6164
  """Modifies an instances's parameters.
6165

6166
  """
6167
  HPATH = "instance-modify"
6168
  HTYPE = constants.HTYPE_INSTANCE
6169
  _OP_REQP = ["instance_name"]
6170
  REQ_BGL = False
6171

    
6172
  def CheckArguments(self):
6173
    if not hasattr(self.op, 'nics'):
6174
      self.op.nics = []
6175
    if not hasattr(self.op, 'disks'):
6176
      self.op.disks = []
6177
    if not hasattr(self.op, 'beparams'):
6178
      self.op.beparams = {}
6179
    if not hasattr(self.op, 'hvparams'):
6180
      self.op.hvparams = {}
6181
    self.op.force = getattr(self.op, "force", False)
6182
    if not (self.op.nics or self.op.disks or
6183
            self.op.hvparams or self.op.beparams):
6184
      raise errors.OpPrereqError("No changes submitted")
6185

    
6186
    # Disk validation
6187
    disk_addremove = 0
6188
    for disk_op, disk_dict in self.op.disks:
6189
      if disk_op == constants.DDM_REMOVE:
6190
        disk_addremove += 1
6191
        continue
6192
      elif disk_op == constants.DDM_ADD:
6193
        disk_addremove += 1
6194
      else:
6195
        if not isinstance(disk_op, int):
6196
          raise errors.OpPrereqError("Invalid disk index")
6197
        if not isinstance(disk_dict, dict):
6198
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
6199
          raise errors.OpPrereqError(msg)
6200

    
6201
      if disk_op == constants.DDM_ADD:
6202
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
6203
        if mode not in constants.DISK_ACCESS_SET:
6204
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
6205
        size = disk_dict.get('size', None)
6206
        if size is None:
6207
          raise errors.OpPrereqError("Required disk parameter size missing")
6208
        try:
6209
          size = int(size)
6210
        except ValueError, err:
6211
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
6212
                                     str(err))
6213
        disk_dict['size'] = size
6214
      else:
6215
        # modification of disk
6216
        if 'size' in disk_dict:
6217
          raise errors.OpPrereqError("Disk size change not possible, use"
6218
                                     " grow-disk")
6219

    
6220
    if disk_addremove > 1:
6221
      raise errors.OpPrereqError("Only one disk add or remove operation"
6222
                                 " supported at a time")
6223

    
6224
    # NIC validation
6225
    nic_addremove = 0
6226
    for nic_op, nic_dict in self.op.nics:
6227
      if nic_op == constants.DDM_REMOVE:
6228
        nic_addremove += 1
6229
        continue
6230
      elif nic_op == constants.DDM_ADD:
6231
        nic_addremove += 1
6232
      else:
6233
        if not isinstance(nic_op, int):
6234
          raise errors.OpPrereqError("Invalid nic index")
6235
        if not isinstance(nic_dict, dict):
6236
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
6237
          raise errors.OpPrereqError(msg)
6238

    
6239
      # nic_dict should be a dict
6240
      nic_ip = nic_dict.get('ip', None)
6241
      if nic_ip is not None:
6242
        if nic_ip.lower() == constants.VALUE_NONE:
6243
          nic_dict['ip'] = None
6244
        else:
6245
          if not utils.IsValidIP(nic_ip):
6246
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
6247

    
6248
      nic_bridge = nic_dict.get('bridge', None)
6249
      nic_link = nic_dict.get('link', None)
6250
      if nic_bridge and nic_link:
6251
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6252
                                   " at the same time")
6253
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
6254
        nic_dict['bridge'] = None
6255
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
6256
        nic_dict['link'] = None
6257

    
6258
      if nic_op == constants.DDM_ADD:
6259
        nic_mac = nic_dict.get('mac', None)
6260
        if nic_mac is None:
6261
          nic_dict['mac'] = constants.VALUE_AUTO
6262

    
6263
      if 'mac' in nic_dict:
6264
        nic_mac = nic_dict['mac']
6265
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6266
          if not utils.IsValidMac(nic_mac):
6267
            raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
6268
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
6269
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
6270
                                     " modifying an existing nic")
6271

    
6272
    if nic_addremove > 1:
6273
      raise errors.OpPrereqError("Only one NIC add or remove operation"
6274
                                 " supported at a time")
6275

    
6276
  def ExpandNames(self):
6277
    self._ExpandAndLockInstance()
6278
    self.needed_locks[locking.LEVEL_NODE] = []
6279
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6280

    
6281
  def DeclareLocks(self, level):
6282
    if level == locking.LEVEL_NODE:
6283
      self._LockInstancesNodes()
6284

    
6285
  def BuildHooksEnv(self):
6286
    """Build hooks env.
6287

6288
    This runs on the master, primary and secondaries.
6289

6290
    """
6291
    args = dict()
6292
    if constants.BE_MEMORY in self.be_new:
6293
      args['memory'] = self.be_new[constants.BE_MEMORY]
6294
    if constants.BE_VCPUS in self.be_new:
6295
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
6296
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
6297
    # information at all.
6298
    if self.op.nics:
6299
      args['nics'] = []
6300
      nic_override = dict(self.op.nics)
6301
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
6302
      for idx, nic in enumerate(self.instance.nics):
6303
        if idx in nic_override:
6304
          this_nic_override = nic_override[idx]
6305
        else:
6306
          this_nic_override = {}
6307
        if 'ip' in this_nic_override:
6308
          ip = this_nic_override['ip']
6309
        else:
6310
          ip = nic.ip
6311
        if 'mac' in this_nic_override:
6312
          mac = this_nic_override['mac']
6313
        else:
6314
          mac = nic.mac
6315
        if idx in self.nic_pnew:
6316
          nicparams = self.nic_pnew[idx]
6317
        else:
6318
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
6319
        mode = nicparams[constants.NIC_MODE]
6320
        link = nicparams[constants.NIC_LINK]
6321
        args['nics'].append((ip, mac, mode, link))
6322
      if constants.DDM_ADD in nic_override:
6323
        ip = nic_override[constants.DDM_ADD].get('ip', None)
6324
        mac = nic_override[constants.DDM_ADD]['mac']
6325
        nicparams = self.nic_pnew[constants.DDM_ADD]
6326
        mode = nicparams[constants.NIC_MODE]
6327
        link = nicparams[constants.NIC_LINK]
6328
        args['nics'].append((ip, mac, mode, link))
6329
      elif constants.DDM_REMOVE in nic_override:
6330
        del args['nics'][-1]
6331

    
6332
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
6333
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6334
    return env, nl, nl
6335

    
6336
  def _GetUpdatedParams(self, old_params, update_dict,
6337
                        default_values, parameter_types):
6338
    """Return the new params dict for the given params.
6339

6340
    @type old_params: dict
6341
    @param old_params: old parameters
6342
    @type update_dict: dict
6343
    @param update_dict: dict containing new parameter values,
6344
                        or constants.VALUE_DEFAULT to reset the
6345
                        parameter to its default value
6346
    @type default_values: dict
6347
    @param default_values: default values for the filled parameters
6348
    @type parameter_types: dict
6349
    @param parameter_types: dict mapping target dict keys to types
6350
                            in constants.ENFORCEABLE_TYPES
6351
    @rtype: (dict, dict)
6352
    @return: (new_parameters, filled_parameters)
6353

6354
    """
6355
    params_copy = copy.deepcopy(old_params)
6356
    for key, val in update_dict.iteritems():
6357
      if val == constants.VALUE_DEFAULT:
6358
        try:
6359
          del params_copy[key]
6360
        except KeyError:
6361
          pass
6362
      else:
6363
        params_copy[key] = val
6364
    utils.ForceDictType(params_copy, parameter_types)
6365
    params_filled = objects.FillDict(default_values, params_copy)
6366
    return (params_copy, params_filled)
6367

    
6368
  def CheckPrereq(self):
6369
    """Check prerequisites.
6370

6371
    This only checks the instance list against the existing names.
6372

6373
    """
6374
    self.force = self.op.force
6375

    
6376
    # checking the new params on the primary/secondary nodes
6377

    
6378
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6379
    cluster = self.cluster = self.cfg.GetClusterInfo()
6380
    assert self.instance is not None, \
6381
      "Cannot retrieve locked instance %s" % self.op.instance_name
6382
    pnode = instance.primary_node
6383
    nodelist = list(instance.all_nodes)
6384

    
6385
    # hvparams processing
6386
    if self.op.hvparams:
6387
      i_hvdict, hv_new = self._GetUpdatedParams(
6388
                             instance.hvparams, self.op.hvparams,
6389
                             cluster.hvparams[instance.hypervisor],
6390
                             constants.HVS_PARAMETER_TYPES)
6391
      # local check
6392
      hypervisor.GetHypervisor(
6393
        instance.hypervisor).CheckParameterSyntax(hv_new)
6394
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
6395
      self.hv_new = hv_new # the new actual values
6396
      self.hv_inst = i_hvdict # the new dict (without defaults)
6397
    else:
6398
      self.hv_new = self.hv_inst = {}
6399

    
6400
    # beparams processing
6401
    if self.op.beparams:
6402
      i_bedict, be_new = self._GetUpdatedParams(
6403
                             instance.beparams, self.op.beparams,
6404
                             cluster.beparams[constants.PP_DEFAULT],
6405
                             constants.BES_PARAMETER_TYPES)
6406
      self.be_new = be_new # the new actual values
6407
      self.be_inst = i_bedict # the new dict (without defaults)
6408
    else:
6409
      self.be_new = self.be_inst = {}
6410

    
6411
    self.warn = []
6412

    
6413
    if constants.BE_MEMORY in self.op.beparams and not self.force:
6414
      mem_check_list = [pnode]
6415
      if be_new[constants.BE_AUTO_BALANCE]:
6416
        # either we changed auto_balance to yes or it was from before
6417
        mem_check_list.extend(instance.secondary_nodes)
6418
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
6419
                                                  instance.hypervisor)
6420
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
6421
                                         instance.hypervisor)
6422
      pninfo = nodeinfo[pnode]
6423
      msg = pninfo.fail_msg
6424
      if msg:
6425
        # Assume the primary node is unreachable and go ahead
6426
        self.warn.append("Can't get info from primary node %s: %s" %
6427
                         (pnode,  msg))
6428
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
6429
        self.warn.append("Node data from primary node %s doesn't contain"
6430
                         " free memory information" % pnode)
6431
      elif instance_info.fail_msg:
6432
        self.warn.append("Can't get instance runtime information: %s" %
6433
                        instance_info.fail_msg)
6434
      else:
6435
        if instance_info.payload:
6436
          current_mem = int(instance_info.payload['memory'])
6437
        else:
6438
          # Assume instance not running
6439
          # (there is a slight race condition here, but it's not very probable,
6440
          # and we have no other way to check)
6441
          current_mem = 0
6442
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
6443
                    pninfo.payload['memory_free'])
6444
        if miss_mem > 0:
6445
          raise errors.OpPrereqError("This change will prevent the instance"
6446
                                     " from starting, due to %d MB of memory"
6447
                                     " missing on its primary node" % miss_mem)
6448

    
6449
      if be_new[constants.BE_AUTO_BALANCE]:
6450
        for node, nres in nodeinfo.items():
6451
          if node not in instance.secondary_nodes:
6452
            continue
6453
          msg = nres.fail_msg
6454
          if msg:
6455
            self.warn.append("Can't get info from secondary node %s: %s" %
6456
                             (node, msg))
6457
          elif not isinstance(nres.payload.get('memory_free', None), int):
6458
            self.warn.append("Secondary node %s didn't return free"
6459
                             " memory information" % node)
6460
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
6461
            self.warn.append("Not enough memory to failover instance to"
6462
                             " secondary node %s" % node)
6463

    
6464
    # NIC processing
6465
    self.nic_pnew = {}
6466
    self.nic_pinst = {}
6467
    for nic_op, nic_dict in self.op.nics:
6468
      if nic_op == constants.DDM_REMOVE:
6469
        if not instance.nics:
6470
          raise errors.OpPrereqError("Instance has no NICs, cannot remove")
6471
        continue
6472
      if nic_op != constants.DDM_ADD:
6473
        # an existing nic
6474
        if nic_op < 0 or nic_op >= len(instance.nics):
6475
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
6476
                                     " are 0 to %d" %
6477
                                     (nic_op, len(instance.nics)))
6478
        old_nic_params = instance.nics[nic_op].nicparams
6479
        old_nic_ip = instance.nics[nic_op].ip
6480
      else:
6481
        old_nic_params = {}
6482
        old_nic_ip = None
6483

    
6484
      update_params_dict = dict([(key, nic_dict[key])
6485
                                 for key in constants.NICS_PARAMETERS
6486
                                 if key in nic_dict])
6487

    
6488
      if 'bridge' in nic_dict:
6489
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
6490

    
6491
      new_nic_params, new_filled_nic_params = \
6492
          self._GetUpdatedParams(old_nic_params, update_params_dict,
6493
                                 cluster.nicparams[constants.PP_DEFAULT],
6494
                                 constants.NICS_PARAMETER_TYPES)
6495
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
6496
      self.nic_pinst[nic_op] = new_nic_params
6497
      self.nic_pnew[nic_op] = new_filled_nic_params
6498
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
6499

    
6500
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
6501
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
6502
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
6503
        if msg:
6504
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
6505
          if self.force:
6506
            self.warn.append(msg)
6507
          else:
6508
            raise errors.OpPrereqError(msg)
6509
      if new_nic_mode == constants.NIC_MODE_ROUTED:
6510
        if 'ip' in nic_dict:
6511
          nic_ip = nic_dict['ip']
6512
        else:
6513
          nic_ip = old_nic_ip
6514
        if nic_ip is None:
6515
          raise errors.OpPrereqError('Cannot set the nic ip to None'
6516
                                     ' on a routed nic')
6517
      if 'mac' in nic_dict:
6518
        nic_mac = nic_dict['mac']
6519
        if nic_mac is None:
6520
          raise errors.OpPrereqError('Cannot set the nic mac to None')
6521
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6522
          # otherwise generate the mac
6523
          nic_dict['mac'] = self.cfg.GenerateMAC()
6524
        else:
6525
          # or validate/reserve the current one
6526
          if self.cfg.IsMacInUse(nic_mac):
6527
            raise errors.OpPrereqError("MAC address %s already in use"
6528
                                       " in cluster" % nic_mac)
6529

    
6530
    # DISK processing
6531
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
6532
      raise errors.OpPrereqError("Disk operations not supported for"
6533
                                 " diskless instances")
6534
    for disk_op, disk_dict in self.op.disks:
6535
      if disk_op == constants.DDM_REMOVE:
6536
        if len(instance.disks) == 1:
6537
          raise errors.OpPrereqError("Cannot remove the last disk of"
6538
                                     " an instance")
6539
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
6540
        ins_l = ins_l[pnode]
6541
        msg = ins_l.fail_msg
6542
        if msg:
6543
          raise errors.OpPrereqError("Can't contact node %s: %s" %
6544
                                     (pnode, msg))
6545
        if instance.name in ins_l.payload:
6546
          raise errors.OpPrereqError("Instance is running, can't remove"
6547
                                     " disks.")
6548

    
6549
      if (disk_op == constants.DDM_ADD and
6550
          len(instance.nics) >= constants.MAX_DISKS):
6551
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
6552
                                   " add more" % constants.MAX_DISKS)
6553
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
6554
        # an existing disk
6555
        if disk_op < 0 or disk_op >= len(instance.disks):
6556
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
6557
                                     " are 0 to %d" %
6558
                                     (disk_op, len(instance.disks)))
6559

    
6560
    return
6561

    
6562
  def Exec(self, feedback_fn):
6563
    """Modifies an instance.
6564

6565
    All parameters take effect only at the next restart of the instance.
6566

6567
    """
6568
    # Process here the warnings from CheckPrereq, as we don't have a
6569
    # feedback_fn there.
6570
    for warn in self.warn:
6571
      feedback_fn("WARNING: %s" % warn)
6572

    
6573
    result = []
6574
    instance = self.instance
6575
    cluster = self.cluster
6576
    # disk changes
6577
    for disk_op, disk_dict in self.op.disks:
6578
      if disk_op == constants.DDM_REMOVE:
6579
        # remove the last disk
6580
        device = instance.disks.pop()
6581
        device_idx = len(instance.disks)
6582
        for node, disk in device.ComputeNodeTree(instance.primary_node):
6583
          self.cfg.SetDiskID(disk, node)
6584
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
6585
          if msg:
6586
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
6587
                            " continuing anyway", device_idx, node, msg)
6588
        result.append(("disk/%d" % device_idx, "remove"))
6589
      elif disk_op == constants.DDM_ADD:
6590
        # add a new disk
6591
        if instance.disk_template == constants.DT_FILE:
6592
          file_driver, file_path = instance.disks[0].logical_id
6593
          file_path = os.path.dirname(file_path)
6594
        else:
6595
          file_driver = file_path = None
6596
        disk_idx_base = len(instance.disks)
6597
        new_disk = _GenerateDiskTemplate(self,
6598
                                         instance.disk_template,
6599
                                         instance.name, instance.primary_node,
6600
                                         instance.secondary_nodes,
6601
                                         [disk_dict],
6602
                                         file_path,
6603
                                         file_driver,
6604
                                         disk_idx_base)[0]
6605
        instance.disks.append(new_disk)
6606
        info = _GetInstanceInfoText(instance)
6607

    
6608
        logging.info("Creating volume %s for instance %s",
6609
                     new_disk.iv_name, instance.name)
6610
        # Note: this needs to be kept in sync with _CreateDisks
6611
        #HARDCODE
6612
        for node in instance.all_nodes:
6613
          f_create = node == instance.primary_node
6614
          try:
6615
            _CreateBlockDev(self, node, instance, new_disk,
6616
                            f_create, info, f_create)
6617
          except errors.OpExecError, err:
6618
            self.LogWarning("Failed to create volume %s (%s) on"
6619
                            " node %s: %s",
6620
                            new_disk.iv_name, new_disk, node, err)
6621
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
6622
                       (new_disk.size, new_disk.mode)))
6623
      else:
6624
        # change a given disk
6625
        instance.disks[disk_op].mode = disk_dict['mode']
6626
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
6627
    # NIC changes
6628
    for nic_op, nic_dict in self.op.nics:
6629
      if nic_op == constants.DDM_REMOVE:
6630
        # remove the last nic
6631
        del instance.nics[-1]
6632
        result.append(("nic.%d" % len(instance.nics), "remove"))
6633
      elif nic_op == constants.DDM_ADD:
6634
        # mac and bridge should be set, by now
6635
        mac = nic_dict['mac']
6636
        ip = nic_dict.get('ip', None)
6637
        nicparams = self.nic_pinst[constants.DDM_ADD]
6638
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
6639
        instance.nics.append(new_nic)
6640
        result.append(("nic.%d" % (len(instance.nics) - 1),
6641
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
6642
                       (new_nic.mac, new_nic.ip,
6643
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
6644
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
6645
                       )))
6646
      else:
6647
        for key in 'mac', 'ip':
6648
          if key in nic_dict:
6649
            setattr(instance.nics[nic_op], key, nic_dict[key])
6650
        if nic_op in self.nic_pnew:
6651
          instance.nics[nic_op].nicparams = self.nic_pnew[nic_op]
6652
        for key, val in nic_dict.iteritems():
6653
          result.append(("nic.%s/%d" % (key, nic_op), val))
6654

    
6655
    # hvparams changes
6656
    if self.op.hvparams:
6657
      instance.hvparams = self.hv_inst
6658
      for key, val in self.op.hvparams.iteritems():
6659
        result.append(("hv/%s" % key, val))
6660

    
6661
    # beparams changes
6662
    if self.op.beparams:
6663
      instance.beparams = self.be_inst
6664
      for key, val in self.op.beparams.iteritems():
6665
        result.append(("be/%s" % key, val))
6666

    
6667
    self.cfg.Update(instance)
6668

    
6669
    return result
6670

    
6671

    
6672
class LUQueryExports(NoHooksLU):
6673
  """Query the exports list
6674

6675
  """
6676
  _OP_REQP = ['nodes']
6677
  REQ_BGL = False
6678

    
6679
  def ExpandNames(self):
6680
    self.needed_locks = {}
6681
    self.share_locks[locking.LEVEL_NODE] = 1
6682
    if not self.op.nodes:
6683
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6684
    else:
6685
      self.needed_locks[locking.LEVEL_NODE] = \
6686
        _GetWantedNodes(self, self.op.nodes)
6687

    
6688
  def CheckPrereq(self):
6689
    """Check prerequisites.
6690

6691
    """
6692
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
6693

    
6694
  def Exec(self, feedback_fn):
6695
    """Compute the list of all the exported system images.
6696

6697
    @rtype: dict
6698
    @return: a dictionary with the structure node->(export-list)
6699
        where export-list is a list of the instances exported on
6700
        that node.
6701

6702
    """
6703
    rpcresult = self.rpc.call_export_list(self.nodes)
6704
    result = {}
6705
    for node in rpcresult:
6706
      if rpcresult[node].fail_msg:
6707
        result[node] = False
6708
      else:
6709
        result[node] = rpcresult[node].payload
6710

    
6711
    return result
6712

    
6713

    
6714
class LUExportInstance(LogicalUnit):
6715
  """Export an instance to an image in the cluster.
6716

6717
  """
6718
  HPATH = "instance-export"
6719
  HTYPE = constants.HTYPE_INSTANCE
6720
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
6721
  REQ_BGL = False
6722

    
6723
  def ExpandNames(self):
6724
    self._ExpandAndLockInstance()
6725
    # FIXME: lock only instance primary and destination node
6726
    #
6727
    # Sad but true, for now we have do lock all nodes, as we don't know where
6728
    # the previous export might be, and and in this LU we search for it and
6729
    # remove it from its current node. In the future we could fix this by:
6730
    #  - making a tasklet to search (share-lock all), then create the new one,
6731
    #    then one to remove, after
6732
    #  - removing the removal operation altogether
6733
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6734

    
6735
  def DeclareLocks(self, level):
6736
    """Last minute lock declaration."""
6737
    # All nodes are locked anyway, so nothing to do here.
6738

    
6739
  def BuildHooksEnv(self):
6740
    """Build hooks env.
6741

6742
    This will run on the master, primary node and target node.
6743

6744
    """
6745
    env = {
6746
      "EXPORT_NODE": self.op.target_node,
6747
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
6748
      }
6749
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6750
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
6751
          self.op.target_node]
6752
    return env, nl, nl
6753

    
6754
  def CheckPrereq(self):
6755
    """Check prerequisites.
6756

6757
    This checks that the instance and node names are valid.
6758

6759
    """
6760
    instance_name = self.op.instance_name
6761
    self.instance = self.cfg.GetInstanceInfo(instance_name)
6762
    assert self.instance is not None, \
6763
          "Cannot retrieve locked instance %s" % self.op.instance_name
6764
    _CheckNodeOnline(self, self.instance.primary_node)
6765

    
6766
    self.dst_node = self.cfg.GetNodeInfo(
6767
      self.cfg.ExpandNodeName(self.op.target_node))
6768

    
6769
    if self.dst_node is None:
6770
      # This is wrong node name, not a non-locked node
6771
      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
6772
    _CheckNodeOnline(self, self.dst_node.name)
6773
    _CheckNodeNotDrained(self, self.dst_node.name)
6774

    
6775
    # instance disk type verification
6776
    for disk in self.instance.disks:
6777
      if disk.dev_type == constants.LD_FILE:
6778
        raise errors.OpPrereqError("Export not supported for instances with"
6779
                                   " file-based disks")
6780

    
6781
  def Exec(self, feedback_fn):
6782
    """Export an instance to an image in the cluster.
6783

6784
    """
6785
    instance = self.instance
6786
    dst_node = self.dst_node
6787
    src_node = instance.primary_node
6788
    if self.op.shutdown:
6789
      # shutdown the instance, but not the disks
6790
      result = self.rpc.call_instance_shutdown(src_node, instance)
6791
      result.Raise("Could not shutdown instance %s on"
6792
                   " node %s" % (instance.name, src_node))
6793

    
6794
    vgname = self.cfg.GetVGName()
6795

    
6796
    snap_disks = []
6797

    
6798
    # set the disks ID correctly since call_instance_start needs the
6799
    # correct drbd minor to create the symlinks
6800
    for disk in instance.disks:
6801
      self.cfg.SetDiskID(disk, src_node)
6802

    
6803
    try:
6804
      for idx, disk in enumerate(instance.disks):
6805
        # result.payload will be a snapshot of an lvm leaf of the one we passed
6806
        result = self.rpc.call_blockdev_snapshot(src_node, disk)
6807
        msg = result.fail_msg
6808
        if msg:
6809
          self.LogWarning("Could not snapshot disk/%s on node %s: %s",
6810
                          idx, src_node, msg)
6811
          snap_disks.append(False)
6812
        else:
6813
          disk_id = (vgname, result.payload)
6814
          new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
6815
                                 logical_id=disk_id, physical_id=disk_id,
6816
                                 iv_name=disk.iv_name)
6817
          snap_disks.append(new_dev)
6818

    
6819
    finally:
6820
      if self.op.shutdown and instance.admin_up:
6821
        result = self.rpc.call_instance_start(src_node, instance, None, None)
6822
        msg = result.fail_msg
6823
        if msg:
6824
          _ShutdownInstanceDisks(self, instance)
6825
          raise errors.OpExecError("Could not start instance: %s" % msg)
6826

    
6827
    # TODO: check for size
6828

    
6829
    cluster_name = self.cfg.GetClusterName()
6830
    for idx, dev in enumerate(snap_disks):
6831
      if dev:
6832
        result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
6833
                                               instance, cluster_name, idx)
6834
        msg = result.fail_msg
6835
        if msg:
6836
          self.LogWarning("Could not export disk/%s from node %s to"
6837
                          " node %s: %s", idx, src_node, dst_node.name, msg)
6838
        msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
6839
        if msg:
6840
          self.LogWarning("Could not remove snapshot for disk/%d from node"
6841
                          " %s: %s", idx, src_node, msg)
6842

    
6843
    result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
6844
    msg = result.fail_msg
6845
    if msg:
6846
      self.LogWarning("Could not finalize export for instance %s"
6847
                      " on node %s: %s", instance.name, dst_node.name, msg)
6848

    
6849
    nodelist = self.cfg.GetNodeList()
6850
    nodelist.remove(dst_node.name)
6851

    
6852
    # on one-node clusters nodelist will be empty after the removal
6853
    # if we proceed the backup would be removed because OpQueryExports
6854
    # substitutes an empty list with the full cluster node list.
6855
    iname = instance.name
6856
    if nodelist:
6857
      exportlist = self.rpc.call_export_list(nodelist)
6858
      for node in exportlist:
6859
        if exportlist[node].fail_msg:
6860
          continue
6861
        if iname in exportlist[node].payload:
6862
          msg = self.rpc.call_export_remove(node, iname).fail_msg
6863
          if msg:
6864
            self.LogWarning("Could not remove older export for instance %s"
6865
                            " on node %s: %s", iname, node, msg)
6866

    
6867

    
6868
class LURemoveExport(NoHooksLU):
6869
  """Remove exports related to the named instance.
6870

6871
  """
6872
  _OP_REQP = ["instance_name"]
6873
  REQ_BGL = False
6874

    
6875
  def ExpandNames(self):
6876
    self.needed_locks = {}
6877
    # We need all nodes to be locked in order for RemoveExport to work, but we
6878
    # don't need to lock the instance itself, as nothing will happen to it (and
6879
    # we can remove exports also for a removed instance)
6880
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6881

    
6882
  def CheckPrereq(self):
6883
    """Check prerequisites.
6884
    """
6885
    pass
6886

    
6887
  def Exec(self, feedback_fn):
6888
    """Remove any export.
6889

6890
    """
6891
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
6892
    # If the instance was not found we'll try with the name that was passed in.
6893
    # This will only work if it was an FQDN, though.
6894
    fqdn_warn = False
6895
    if not instance_name:
6896
      fqdn_warn = True
6897
      instance_name = self.op.instance_name
6898

    
6899
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6900
    exportlist = self.rpc.call_export_list(locked_nodes)
6901
    found = False
6902
    for node in exportlist:
6903
      msg = exportlist[node].fail_msg
6904
      if msg:
6905
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
6906
        continue
6907
      if instance_name in exportlist[node].payload:
6908
        found = True
6909
        result = self.rpc.call_export_remove(node, instance_name)
6910
        msg = result.fail_msg
6911
        if msg:
6912
          logging.error("Could not remove export for instance %s"
6913
                        " on node %s: %s", instance_name, node, msg)
6914

    
6915
    if fqdn_warn and not found:
6916
      feedback_fn("Export not found. If trying to remove an export belonging"
6917
                  " to a deleted instance please use its Fully Qualified"
6918
                  " Domain Name.")
6919

    
6920

    
6921
class TagsLU(NoHooksLU):
6922
  """Generic tags LU.
6923

6924
  This is an abstract class which is the parent of all the other tags LUs.
6925

6926
  """
6927

    
6928
  def ExpandNames(self):
6929
    self.needed_locks = {}
6930
    if self.op.kind == constants.TAG_NODE:
6931
      name = self.cfg.ExpandNodeName(self.op.name)
6932
      if name is None:
6933
        raise errors.OpPrereqError("Invalid node name (%s)" %
6934
                                   (self.op.name,))
6935
      self.op.name = name
6936
      self.needed_locks[locking.LEVEL_NODE] = name
6937
    elif self.op.kind == constants.TAG_INSTANCE:
6938
      name = self.cfg.ExpandInstanceName(self.op.name)
6939
      if name is None:
6940
        raise errors.OpPrereqError("Invalid instance name (%s)" %
6941
                                   (self.op.name,))
6942
      self.op.name = name
6943
      self.needed_locks[locking.LEVEL_INSTANCE] = name
6944

    
6945
  def CheckPrereq(self):
6946
    """Check prerequisites.
6947

6948
    """
6949
    if self.op.kind == constants.TAG_CLUSTER:
6950
      self.target = self.cfg.GetClusterInfo()
6951
    elif self.op.kind == constants.TAG_NODE:
6952
      self.target = self.cfg.GetNodeInfo(self.op.name)
6953
    elif self.op.kind == constants.TAG_INSTANCE:
6954
      self.target = self.cfg.GetInstanceInfo(self.op.name)
6955
    else:
6956
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
6957
                                 str(self.op.kind))
6958

    
6959

    
6960
class LUGetTags(TagsLU):
6961
  """Returns the tags of a given object.
6962

6963
  """
6964
  _OP_REQP = ["kind", "name"]
6965
  REQ_BGL = False
6966

    
6967
  def Exec(self, feedback_fn):
6968
    """Returns the tag list.
6969

6970
    """
6971
    return list(self.target.GetTags())
6972

    
6973

    
6974
class LUSearchTags(NoHooksLU):
6975
  """Searches the tags for a given pattern.
6976

6977
  """
6978
  _OP_REQP = ["pattern"]
6979
  REQ_BGL = False
6980

    
6981
  def ExpandNames(self):
6982
    self.needed_locks = {}
6983

    
6984
  def CheckPrereq(self):
6985
    """Check prerequisites.
6986

6987
    This checks the pattern passed for validity by compiling it.
6988

6989
    """
6990
    try:
6991
      self.re = re.compile(self.op.pattern)
6992
    except re.error, err:
6993
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
6994
                                 (self.op.pattern, err))
6995

    
6996
  def Exec(self, feedback_fn):
6997
    """Returns the tag list.
6998

6999
    """
7000
    cfg = self.cfg
7001
    tgts = [("/cluster", cfg.GetClusterInfo())]
7002
    ilist = cfg.GetAllInstancesInfo().values()
7003
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
7004
    nlist = cfg.GetAllNodesInfo().values()
7005
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
7006
    results = []
7007
    for path, target in tgts:
7008
      for tag in target.GetTags():
7009
        if self.re.search(tag):
7010
          results.append((path, tag))
7011
    return results
7012

    
7013

    
7014
class LUAddTags(TagsLU):
7015
  """Sets a tag on a given object.
7016

7017
  """
7018
  _OP_REQP = ["kind", "name", "tags"]
7019
  REQ_BGL = False
7020

    
7021
  def CheckPrereq(self):
7022
    """Check prerequisites.
7023

7024
    This checks the type and length of the tag name and value.
7025

7026
    """
7027
    TagsLU.CheckPrereq(self)
7028
    for tag in self.op.tags:
7029
      objects.TaggableObject.ValidateTag(tag)
7030

    
7031
  def Exec(self, feedback_fn):
7032
    """Sets the tag.
7033

7034
    """
7035
    try:
7036
      for tag in self.op.tags:
7037
        self.target.AddTag(tag)
7038
    except errors.TagError, err:
7039
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
7040
    try:
7041
      self.cfg.Update(self.target)
7042
    except errors.ConfigurationError:
7043
      raise errors.OpRetryError("There has been a modification to the"
7044
                                " config file and the operation has been"
7045
                                " aborted. Please retry.")
7046

    
7047

    
7048
class LUDelTags(TagsLU):
7049
  """Delete a list of tags from a given object.
7050

7051
  """
7052
  _OP_REQP = ["kind", "name", "tags"]
7053
  REQ_BGL = False
7054

    
7055
  def CheckPrereq(self):
7056
    """Check prerequisites.
7057

7058
    This checks that we have the given tag.
7059

7060
    """
7061
    TagsLU.CheckPrereq(self)
7062
    for tag in self.op.tags:
7063
      objects.TaggableObject.ValidateTag(tag)
7064
    del_tags = frozenset(self.op.tags)
7065
    cur_tags = self.target.GetTags()
7066
    if not del_tags <= cur_tags:
7067
      diff_tags = del_tags - cur_tags
7068
      diff_names = ["'%s'" % tag for tag in diff_tags]
7069
      diff_names.sort()
7070
      raise errors.OpPrereqError("Tag(s) %s not found" %
7071
                                 (",".join(diff_names)))
7072

    
7073
  def Exec(self, feedback_fn):
7074
    """Remove the tag from the object.
7075

7076
    """
7077
    for tag in self.op.tags:
7078
      self.target.RemoveTag(tag)
7079
    try:
7080
      self.cfg.Update(self.target)
7081
    except errors.ConfigurationError:
7082
      raise errors.OpRetryError("There has been a modification to the"
7083
                                " config file and the operation has been"
7084
                                " aborted. Please retry.")
7085

    
7086

    
7087
class LUTestDelay(NoHooksLU):
7088
  """Sleep for a specified amount of time.
7089

7090
  This LU sleeps on the master and/or nodes for a specified amount of
7091
  time.
7092

7093
  """
7094
  _OP_REQP = ["duration", "on_master", "on_nodes"]
7095
  REQ_BGL = False
7096

    
7097
  def ExpandNames(self):
7098
    """Expand names and set required locks.
7099

7100
    This expands the node list, if any.
7101

7102
    """
7103
    self.needed_locks = {}
7104
    if self.op.on_nodes:
7105
      # _GetWantedNodes can be used here, but is not always appropriate to use
7106
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
7107
      # more information.
7108
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
7109
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
7110

    
7111
  def CheckPrereq(self):
7112
    """Check prerequisites.
7113

7114
    """
7115

    
7116
  def Exec(self, feedback_fn):
7117
    """Do the actual sleep.
7118

7119
    """
7120
    if self.op.on_master:
7121
      if not utils.TestDelay(self.op.duration):
7122
        raise errors.OpExecError("Error during master delay test")
7123
    if self.op.on_nodes:
7124
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
7125
      for node, node_result in result.items():
7126
        node_result.Raise("Failure during rpc call to node %s" % node)
7127

    
7128

    
7129
class IAllocator(object):
7130
  """IAllocator framework.
7131

7132
  An IAllocator instance has three sets of attributes:
7133
    - cfg that is needed to query the cluster
7134
    - input data (all members of the _KEYS class attribute are required)
7135
    - four buffer attributes (in|out_data|text), that represent the
7136
      input (to the external script) in text and data structure format,
7137
      and the output from it, again in two formats
7138
    - the result variables from the script (success, info, nodes) for
7139
      easy usage
7140

7141
  """
7142
  _ALLO_KEYS = [
7143
    "mem_size", "disks", "disk_template",
7144
    "os", "tags", "nics", "vcpus", "hypervisor",
7145
    ]
7146
  _RELO_KEYS = [
7147
    "relocate_from",
7148
    ]
7149

    
7150
  def __init__(self, cfg, rpc, mode, name, **kwargs):
7151
    self.cfg = cfg
7152
    self.rpc = rpc
7153
    # init buffer variables
7154
    self.in_text = self.out_text = self.in_data = self.out_data = None
7155
    # init all input fields so that pylint is happy
7156
    self.mode = mode
7157
    self.name = name
7158
    self.mem_size = self.disks = self.disk_template = None
7159
    self.os = self.tags = self.nics = self.vcpus = None
7160
    self.hypervisor = None
7161
    self.relocate_from = None
7162
    # computed fields
7163
    self.required_nodes = None
7164
    # init result fields
7165
    self.success = self.info = self.nodes = None
7166
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7167
      keyset = self._ALLO_KEYS
7168
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
7169
      keyset = self._RELO_KEYS
7170
    else:
7171
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
7172
                                   " IAllocator" % self.mode)
7173
    for key in kwargs:
7174
      if key not in keyset:
7175
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
7176
                                     " IAllocator" % key)
7177
      setattr(self, key, kwargs[key])
7178
    for key in keyset:
7179
      if key not in kwargs:
7180
        raise errors.ProgrammerError("Missing input parameter '%s' to"
7181
                                     " IAllocator" % key)
7182
    self._BuildInputData()
7183

    
7184
  def _ComputeClusterData(self):
7185
    """Compute the generic allocator input data.
7186

7187
    This is the data that is independent of the actual operation.
7188

7189
    """
7190
    cfg = self.cfg
7191
    cluster_info = cfg.GetClusterInfo()
7192
    # cluster data
7193
    data = {
7194
      "version": constants.IALLOCATOR_VERSION,
7195
      "cluster_name": cfg.GetClusterName(),
7196
      "cluster_tags": list(cluster_info.GetTags()),
7197
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
7198
      # we don't have job IDs
7199
      }
7200
    iinfo = cfg.GetAllInstancesInfo().values()
7201
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
7202

    
7203
    # node data
7204
    node_results = {}
7205
    node_list = cfg.GetNodeList()
7206

    
7207
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7208
      hypervisor_name = self.hypervisor
7209
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
7210
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
7211

    
7212
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
7213
                                        hypervisor_name)
7214
    node_iinfo = \
7215
      self.rpc.call_all_instances_info(node_list,
7216
                                       cluster_info.enabled_hypervisors)
7217
    for nname, nresult in node_data.items():
7218
      # first fill in static (config-based) values
7219
      ninfo = cfg.GetNodeInfo(nname)
7220
      pnr = {
7221
        "tags": list(ninfo.GetTags()),
7222
        "primary_ip": ninfo.primary_ip,
7223
        "secondary_ip": ninfo.secondary_ip,
7224
        "offline": ninfo.offline,
7225
        "drained": ninfo.drained,
7226
        "master_candidate": ninfo.master_candidate,
7227
        }
7228

    
7229
      if not ninfo.offline:
7230
        nresult.Raise("Can't get data for node %s" % nname)
7231
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
7232
                                nname)
7233
        remote_info = nresult.payload
7234
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
7235
                     'vg_size', 'vg_free', 'cpu_total']:
7236
          if attr not in remote_info:
7237
            raise errors.OpExecError("Node '%s' didn't return attribute"
7238
                                     " '%s'" % (nname, attr))
7239
          if not isinstance(remote_info[attr], int):
7240
            raise errors.OpExecError("Node '%s' returned invalid value"
7241
                                     " for '%s': %s" %
7242
                                     (nname, attr, remote_info[attr]))
7243
        # compute memory used by primary instances
7244
        i_p_mem = i_p_up_mem = 0
7245
        for iinfo, beinfo in i_list:
7246
          if iinfo.primary_node == nname:
7247
            i_p_mem += beinfo[constants.BE_MEMORY]
7248
            if iinfo.name not in node_iinfo[nname].payload:
7249
              i_used_mem = 0
7250
            else:
7251
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
7252
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
7253
            remote_info['memory_free'] -= max(0, i_mem_diff)
7254

    
7255
            if iinfo.admin_up:
7256
              i_p_up_mem += beinfo[constants.BE_MEMORY]
7257

    
7258
        # compute memory used by instances
7259
        pnr_dyn = {
7260
          "total_memory": remote_info['memory_total'],
7261
          "reserved_memory": remote_info['memory_dom0'],
7262
          "free_memory": remote_info['memory_free'],
7263
          "total_disk": remote_info['vg_size'],
7264
          "free_disk": remote_info['vg_free'],
7265
          "total_cpus": remote_info['cpu_total'],
7266
          "i_pri_memory": i_p_mem,
7267
          "i_pri_up_memory": i_p_up_mem,
7268
          }
7269
        pnr.update(pnr_dyn)
7270

    
7271
      node_results[nname] = pnr
7272
    data["nodes"] = node_results
7273

    
7274
    # instance data
7275
    instance_data = {}
7276
    for iinfo, beinfo in i_list:
7277
      nic_data = []
7278
      for nic in iinfo.nics:
7279
        filled_params = objects.FillDict(
7280
            cluster_info.nicparams[constants.PP_DEFAULT],
7281
            nic.nicparams)
7282
        nic_dict = {"mac": nic.mac,
7283
                    "ip": nic.ip,
7284
                    "mode": filled_params[constants.NIC_MODE],
7285
                    "link": filled_params[constants.NIC_LINK],
7286
                   }
7287
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
7288
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
7289
        nic_data.append(nic_dict)
7290
      pir = {
7291
        "tags": list(iinfo.GetTags()),
7292
        "admin_up": iinfo.admin_up,
7293
        "vcpus": beinfo[constants.BE_VCPUS],
7294
        "memory": beinfo[constants.BE_MEMORY],
7295
        "os": iinfo.os,
7296
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
7297
        "nics": nic_data,
7298
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
7299
        "disk_template": iinfo.disk_template,
7300
        "hypervisor": iinfo.hypervisor,
7301
        }
7302
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
7303
                                                 pir["disks"])
7304
      instance_data[iinfo.name] = pir
7305

    
7306
    data["instances"] = instance_data
7307

    
7308
    self.in_data = data
7309

    
7310
  def _AddNewInstance(self):
7311
    """Add new instance data to allocator structure.
7312

7313
    This in combination with _AllocatorGetClusterData will create the
7314
    correct structure needed as input for the allocator.
7315

7316
    The checks for the completeness of the opcode must have already been
7317
    done.
7318

7319
    """
7320
    data = self.in_data
7321

    
7322
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
7323

    
7324
    if self.disk_template in constants.DTS_NET_MIRROR:
7325
      self.required_nodes = 2
7326
    else:
7327
      self.required_nodes = 1
7328
    request = {
7329
      "type": "allocate",
7330
      "name": self.name,
7331
      "disk_template": self.disk_template,
7332
      "tags": self.tags,
7333
      "os": self.os,
7334
      "vcpus": self.vcpus,
7335
      "memory": self.mem_size,
7336
      "disks": self.disks,
7337
      "disk_space_total": disk_space,
7338
      "nics": self.nics,
7339
      "required_nodes": self.required_nodes,
7340
      }
7341
    data["request"] = request
7342

    
7343
  def _AddRelocateInstance(self):
7344
    """Add relocate instance data to allocator structure.
7345

7346
    This in combination with _IAllocatorGetClusterData will create the
7347
    correct structure needed as input for the allocator.
7348

7349
    The checks for the completeness of the opcode must have already been
7350
    done.
7351

7352
    """
7353
    instance = self.cfg.GetInstanceInfo(self.name)
7354
    if instance is None:
7355
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
7356
                                   " IAllocator" % self.name)
7357

    
7358
    if instance.disk_template not in constants.DTS_NET_MIRROR:
7359
      raise errors.OpPrereqError("Can't relocate non-mirrored instances")
7360

    
7361
    if len(instance.secondary_nodes) != 1:
7362
      raise errors.OpPrereqError("Instance has not exactly one secondary node")
7363

    
7364
    self.required_nodes = 1
7365
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
7366
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
7367

    
7368
    request = {
7369
      "type": "relocate",
7370
      "name": self.name,
7371
      "disk_space_total": disk_space,
7372
      "required_nodes": self.required_nodes,
7373
      "relocate_from": self.relocate_from,
7374
      }
7375
    self.in_data["request"] = request
7376

    
7377
  def _BuildInputData(self):
7378
    """Build input data structures.
7379

7380
    """
7381
    self._ComputeClusterData()
7382

    
7383
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7384
      self._AddNewInstance()
7385
    else:
7386
      self._AddRelocateInstance()
7387

    
7388
    self.in_text = serializer.Dump(self.in_data)
7389

    
7390
  def Run(self, name, validate=True, call_fn=None):
7391
    """Run an instance allocator and return the results.
7392

7393
    """
7394
    if call_fn is None:
7395
      call_fn = self.rpc.call_iallocator_runner
7396

    
7397
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
7398
    result.Raise("Failure while running the iallocator script")
7399

    
7400
    self.out_text = result.payload
7401
    if validate:
7402
      self._ValidateResult()
7403

    
7404
  def _ValidateResult(self):
7405
    """Process the allocator results.
7406

7407
    This will process and if successful save the result in
7408
    self.out_data and the other parameters.
7409

7410
    """
7411
    try:
7412
      rdict = serializer.Load(self.out_text)
7413
    except Exception, err:
7414
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
7415

    
7416
    if not isinstance(rdict, dict):
7417
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
7418

    
7419
    for key in "success", "info", "nodes":
7420
      if key not in rdict:
7421
        raise errors.OpExecError("Can't parse iallocator results:"
7422
                                 " missing key '%s'" % key)
7423
      setattr(self, key, rdict[key])
7424

    
7425
    if not isinstance(rdict["nodes"], list):
7426
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
7427
                               " is not a list")
7428
    self.out_data = rdict
7429

    
7430

    
7431
class LUTestAllocator(NoHooksLU):
7432
  """Run allocator tests.
7433

7434
  This LU runs the allocator tests
7435

7436
  """
7437
  _OP_REQP = ["direction", "mode", "name"]
7438

    
7439
  def CheckPrereq(self):
7440
    """Check prerequisites.
7441

7442
    This checks the opcode parameters depending on the director and mode test.
7443

7444
    """
7445
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
7446
      for attr in ["name", "mem_size", "disks", "disk_template",
7447
                   "os", "tags", "nics", "vcpus"]:
7448
        if not hasattr(self.op, attr):
7449
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
7450
                                     attr)
7451
      iname = self.cfg.ExpandInstanceName(self.op.name)
7452
      if iname is not None:
7453
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
7454
                                   iname)
7455
      if not isinstance(self.op.nics, list):
7456
        raise errors.OpPrereqError("Invalid parameter 'nics'")
7457
      for row in self.op.nics:
7458
        if (not isinstance(row, dict) or
7459
            "mac" not in row or
7460
            "ip" not in row or
7461
            "bridge" not in row):
7462
          raise errors.OpPrereqError("Invalid contents of the"
7463
                                     " 'nics' parameter")
7464
      if not isinstance(self.op.disks, list):
7465
        raise errors.OpPrereqError("Invalid parameter 'disks'")
7466
      for row in self.op.disks:
7467
        if (not isinstance(row, dict) or
7468
            "size" not in row or
7469
            not isinstance(row["size"], int) or
7470
            "mode" not in row or
7471
            row["mode"] not in ['r', 'w']):
7472
          raise errors.OpPrereqError("Invalid contents of the"
7473
                                     " 'disks' parameter")
7474
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
7475
        self.op.hypervisor = self.cfg.GetHypervisorType()
7476
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
7477
      if not hasattr(self.op, "name"):
7478
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
7479
      fname = self.cfg.ExpandInstanceName(self.op.name)
7480
      if fname is None:
7481
        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
7482
                                   self.op.name)
7483
      self.op.name = fname
7484
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
7485
    else:
7486
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
7487
                                 self.op.mode)
7488

    
7489
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
7490
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
7491
        raise errors.OpPrereqError("Missing allocator name")
7492
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
7493
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
7494
                                 self.op.direction)
7495

    
7496
  def Exec(self, feedback_fn):
7497
    """Run the allocator test.
7498

7499
    """
7500
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
7501
      ial = IAllocator(self.cfg, self.rpc,
7502
                       mode=self.op.mode,
7503
                       name=self.op.name,
7504
                       mem_size=self.op.mem_size,
7505
                       disks=self.op.disks,
7506
                       disk_template=self.op.disk_template,
7507
                       os=self.op.os,
7508
                       tags=self.op.tags,
7509
                       nics=self.op.nics,
7510
                       vcpus=self.op.vcpus,
7511
                       hypervisor=self.op.hypervisor,
7512
                       )
7513
    else:
7514
      ial = IAllocator(self.cfg, self.rpc,
7515
                       mode=self.op.mode,
7516
                       name=self.op.name,
7517
                       relocate_from=list(self.relocate_from),
7518
                       )
7519

    
7520
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
7521
      result = ial.in_text
7522
    else:
7523
      ial.Run(self.op.allocator, validate=False)
7524
      result = ial.out_text
7525
    return result