Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 7ffc5a86

History | View | Annotate | Download (259.7 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0613,W0201
25

    
26
import os
27
import os.path
28
import time
29
import re
30
import platform
31
import logging
32
import copy
33

    
34
from ganeti import ssh
35
from ganeti import utils
36
from ganeti import errors
37
from ganeti import hypervisor
38
from ganeti import locking
39
from ganeti import constants
40
from ganeti import objects
41
from ganeti import serializer
42
from ganeti import ssconf
43

    
44

    
45
class LogicalUnit(object):
46
  """Logical Unit base class.
47

48
  Subclasses must follow these rules:
49
    - implement ExpandNames
50
    - implement CheckPrereq (except when tasklets are used)
51
    - implement Exec (except when tasklets are used)
52
    - implement BuildHooksEnv
53
    - redefine HPATH and HTYPE
54
    - optionally redefine their run requirements:
55
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
56

57
  Note that all commands require root permissions.
58

59
  @ivar dry_run_result: the value (if any) that will be returned to the caller
60
      in dry-run mode (signalled by opcode dry_run parameter)
61

62
  """
63
  HPATH = None
64
  HTYPE = None
65
  _OP_REQP = []
66
  REQ_BGL = True
67

    
68
  def __init__(self, processor, op, context, rpc):
69
    """Constructor for LogicalUnit.
70

71
    This needs to be overridden in derived classes in order to check op
72
    validity.
73

74
    """
75
    self.proc = processor
76
    self.op = op
77
    self.cfg = context.cfg
78
    self.context = context
79
    self.rpc = rpc
80
    # Dicts used to declare locking needs to mcpu
81
    self.needed_locks = None
82
    self.acquired_locks = {}
83
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
84
    self.add_locks = {}
85
    self.remove_locks = {}
86
    # Used to force good behavior when calling helper functions
87
    self.recalculate_locks = {}
88
    self.__ssh = None
89
    # logging
90
    self.LogWarning = processor.LogWarning
91
    self.LogInfo = processor.LogInfo
92
    self.LogStep = processor.LogStep
93
    # support for dry-run
94
    self.dry_run_result = None
95

    
96
    # Tasklets
97
    self.tasklets = []
98

    
99
    for attr_name in self._OP_REQP:
100
      attr_val = getattr(op, attr_name, None)
101
      if attr_val is None:
102
        raise errors.OpPrereqError("Required parameter '%s' missing" %
103
                                   attr_name)
104

    
105
    self.CheckArguments()
106

    
107
  def __GetSSH(self):
108
    """Returns the SshRunner object
109

110
    """
111
    if not self.__ssh:
112
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
113
    return self.__ssh
114

    
115
  ssh = property(fget=__GetSSH)
116

    
117
  def CheckArguments(self):
118
    """Check syntactic validity for the opcode arguments.
119

120
    This method is for doing a simple syntactic check and ensure
121
    validity of opcode parameters, without any cluster-related
122
    checks. While the same can be accomplished in ExpandNames and/or
123
    CheckPrereq, doing these separate is better because:
124

125
      - ExpandNames is left as as purely a lock-related function
126
      - CheckPrereq is run after we have acquired locks (and possible
127
        waited for them)
128

129
    The function is allowed to change the self.op attribute so that
130
    later methods can no longer worry about missing parameters.
131

132
    """
133
    pass
134

    
135
  def ExpandNames(self):
136
    """Expand names for this LU.
137

138
    This method is called before starting to execute the opcode, and it should
139
    update all the parameters of the opcode to their canonical form (e.g. a
140
    short node name must be fully expanded after this method has successfully
141
    completed). This way locking, hooks, logging, ecc. can work correctly.
142

143
    LUs which implement this method must also populate the self.needed_locks
144
    member, as a dict with lock levels as keys, and a list of needed lock names
145
    as values. Rules:
146

147
      - use an empty dict if you don't need any lock
148
      - if you don't need any lock at a particular level omit that level
149
      - don't put anything for the BGL level
150
      - if you want all locks at a level use locking.ALL_SET as a value
151

152
    If you need to share locks (rather than acquire them exclusively) at one
153
    level you can modify self.share_locks, setting a true value (usually 1) for
154
    that level. By default locks are not shared.
155

156
    This function can also define a list of tasklets, which then will be
157
    executed in order instead of the usual LU-level CheckPrereq and Exec
158
    functions, if those are not defined by the LU.
159

160
    Examples::
161

162
      # Acquire all nodes and one instance
163
      self.needed_locks = {
164
        locking.LEVEL_NODE: locking.ALL_SET,
165
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
166
      }
167
      # Acquire just two nodes
168
      self.needed_locks = {
169
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
170
      }
171
      # Acquire no locks
172
      self.needed_locks = {} # No, you can't leave it to the default value None
173

174
    """
175
    # The implementation of this method is mandatory only if the new LU is
176
    # concurrent, so that old LUs don't need to be changed all at the same
177
    # time.
178
    if self.REQ_BGL:
179
      self.needed_locks = {} # Exclusive LUs don't need locks.
180
    else:
181
      raise NotImplementedError
182

    
183
  def DeclareLocks(self, level):
184
    """Declare LU locking needs for a level
185

186
    While most LUs can just declare their locking needs at ExpandNames time,
187
    sometimes there's the need to calculate some locks after having acquired
188
    the ones before. This function is called just before acquiring locks at a
189
    particular level, but after acquiring the ones at lower levels, and permits
190
    such calculations. It can be used to modify self.needed_locks, and by
191
    default it does nothing.
192

193
    This function is only called if you have something already set in
194
    self.needed_locks for the level.
195

196
    @param level: Locking level which is going to be locked
197
    @type level: member of ganeti.locking.LEVELS
198

199
    """
200

    
201
  def CheckPrereq(self):
202
    """Check prerequisites for this LU.
203

204
    This method should check that the prerequisites for the execution
205
    of this LU are fulfilled. It can do internode communication, but
206
    it should be idempotent - no cluster or system changes are
207
    allowed.
208

209
    The method should raise errors.OpPrereqError in case something is
210
    not fulfilled. Its return value is ignored.
211

212
    This method should also update all the parameters of the opcode to
213
    their canonical form if it hasn't been done by ExpandNames before.
214

215
    """
216
    if self.tasklets:
217
      for tl in self.tasklets:
218
        tl.CheckPrereq()
219
    else:
220
      raise NotImplementedError
221

    
222
  def Exec(self, feedback_fn):
223
    """Execute the LU.
224

225
    This method should implement the actual work. It should raise
226
    errors.OpExecError for failures that are somewhat dealt with in
227
    code, or expected.
228

229
    """
230
    if self.tasklets:
231
      for tl in self.tasklets:
232
        tl.Exec(feedback_fn)
233
    else:
234
      raise NotImplementedError
235

    
236
  def BuildHooksEnv(self):
237
    """Build hooks environment for this LU.
238

239
    This method should return a three-node tuple consisting of: a dict
240
    containing the environment that will be used for running the
241
    specific hook for this LU, a list of node names on which the hook
242
    should run before the execution, and a list of node names on which
243
    the hook should run after the execution.
244

245
    The keys of the dict must not have 'GANETI_' prefixed as this will
246
    be handled in the hooks runner. Also note additional keys will be
247
    added by the hooks runner. If the LU doesn't define any
248
    environment, an empty dict (and not None) should be returned.
249

250
    No nodes should be returned as an empty list (and not None).
251

252
    Note that if the HPATH for a LU class is None, this function will
253
    not be called.
254

255
    """
256
    raise NotImplementedError
257

    
258
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
259
    """Notify the LU about the results of its hooks.
260

261
    This method is called every time a hooks phase is executed, and notifies
262
    the Logical Unit about the hooks' result. The LU can then use it to alter
263
    its result based on the hooks.  By default the method does nothing and the
264
    previous result is passed back unchanged but any LU can define it if it
265
    wants to use the local cluster hook-scripts somehow.
266

267
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
268
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
269
    @param hook_results: the results of the multi-node hooks rpc call
270
    @param feedback_fn: function used send feedback back to the caller
271
    @param lu_result: the previous Exec result this LU had, or None
272
        in the PRE phase
273
    @return: the new Exec result, based on the previous result
274
        and hook results
275

276
    """
277
    return lu_result
278

    
279
  def _ExpandAndLockInstance(self):
280
    """Helper function to expand and lock an instance.
281

282
    Many LUs that work on an instance take its name in self.op.instance_name
283
    and need to expand it and then declare the expanded name for locking. This
284
    function does it, and then updates self.op.instance_name to the expanded
285
    name. It also initializes needed_locks as a dict, if this hasn't been done
286
    before.
287

288
    """
289
    if self.needed_locks is None:
290
      self.needed_locks = {}
291
    else:
292
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
293
        "_ExpandAndLockInstance called with instance-level locks set"
294
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
295
    if expanded_name is None:
296
      raise errors.OpPrereqError("Instance '%s' not known" %
297
                                  self.op.instance_name)
298
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
299
    self.op.instance_name = expanded_name
300

    
301
  def _LockInstancesNodes(self, primary_only=False):
302
    """Helper function to declare instances' nodes for locking.
303

304
    This function should be called after locking one or more instances to lock
305
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
306
    with all primary or secondary nodes for instances already locked and
307
    present in self.needed_locks[locking.LEVEL_INSTANCE].
308

309
    It should be called from DeclareLocks, and for safety only works if
310
    self.recalculate_locks[locking.LEVEL_NODE] is set.
311

312
    In the future it may grow parameters to just lock some instance's nodes, or
313
    to just lock primaries or secondary nodes, if needed.
314

315
    If should be called in DeclareLocks in a way similar to::
316

317
      if level == locking.LEVEL_NODE:
318
        self._LockInstancesNodes()
319

320
    @type primary_only: boolean
321
    @param primary_only: only lock primary nodes of locked instances
322

323
    """
324
    assert locking.LEVEL_NODE in self.recalculate_locks, \
325
      "_LockInstancesNodes helper function called with no nodes to recalculate"
326

    
327
    # TODO: check if we're really been called with the instance locks held
328

    
329
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
330
    # future we might want to have different behaviors depending on the value
331
    # of self.recalculate_locks[locking.LEVEL_NODE]
332
    wanted_nodes = []
333
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
334
      instance = self.context.cfg.GetInstanceInfo(instance_name)
335
      wanted_nodes.append(instance.primary_node)
336
      if not primary_only:
337
        wanted_nodes.extend(instance.secondary_nodes)
338

    
339
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
340
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
341
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
342
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
343

    
344
    del self.recalculate_locks[locking.LEVEL_NODE]
345

    
346

    
347
class NoHooksLU(LogicalUnit):
348
  """Simple LU which runs no hooks.
349

350
  This LU is intended as a parent for other LogicalUnits which will
351
  run no hooks, in order to reduce duplicate code.
352

353
  """
354
  HPATH = None
355
  HTYPE = None
356

    
357

    
358
class Tasklet:
359
  """Tasklet base class.
360

361
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
362
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
363
  tasklets know nothing about locks.
364

365
  Subclasses must follow these rules:
366
    - Implement CheckPrereq
367
    - Implement Exec
368

369
  """
370
  def CheckPrereq(self):
371
    """Check prerequisites for this tasklets.
372

373
    This method should check whether the prerequisites for the execution of
374
    this tasklet are fulfilled. It can do internode communication, but it
375
    should be idempotent - no cluster or system changes are allowed.
376

377
    The method should raise errors.OpPrereqError in case something is not
378
    fulfilled. Its return value is ignored.
379

380
    This method should also update all parameters to their canonical form if it
381
    hasn't been done before.
382

383
    """
384
    raise NotImplementedError
385

    
386
  def Exec(self, feedback_fn):
387
    """Execute the tasklet.
388

389
    This method should implement the actual work. It should raise
390
    errors.OpExecError for failures that are somewhat dealt with in code, or
391
    expected.
392

393
    """
394
    raise NotImplementedError
395

    
396

    
397
def _GetWantedNodes(lu, nodes):
398
  """Returns list of checked and expanded node names.
399

400
  @type lu: L{LogicalUnit}
401
  @param lu: the logical unit on whose behalf we execute
402
  @type nodes: list
403
  @param nodes: list of node names or None for all nodes
404
  @rtype: list
405
  @return: the list of nodes, sorted
406
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
407

408
  """
409
  if not isinstance(nodes, list):
410
    raise errors.OpPrereqError("Invalid argument type 'nodes'")
411

    
412
  if not nodes:
413
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
414
      " non-empty list of nodes whose name is to be expanded.")
415

    
416
  wanted = []
417
  for name in nodes:
418
    node = lu.cfg.ExpandNodeName(name)
419
    if node is None:
420
      raise errors.OpPrereqError("No such node name '%s'" % name)
421
    wanted.append(node)
422

    
423
  return utils.NiceSort(wanted)
424

    
425

    
426
def _GetWantedInstances(lu, instances):
427
  """Returns list of checked and expanded instance names.
428

429
  @type lu: L{LogicalUnit}
430
  @param lu: the logical unit on whose behalf we execute
431
  @type instances: list
432
  @param instances: list of instance names or None for all instances
433
  @rtype: list
434
  @return: the list of instances, sorted
435
  @raise errors.OpPrereqError: if the instances parameter is wrong type
436
  @raise errors.OpPrereqError: if any of the passed instances is not found
437

438
  """
439
  if not isinstance(instances, list):
440
    raise errors.OpPrereqError("Invalid argument type 'instances'")
441

    
442
  if instances:
443
    wanted = []
444

    
445
    for name in instances:
446
      instance = lu.cfg.ExpandInstanceName(name)
447
      if instance is None:
448
        raise errors.OpPrereqError("No such instance name '%s'" % name)
449
      wanted.append(instance)
450

    
451
  else:
452
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
453
  return wanted
454

    
455

    
456
def _CheckOutputFields(static, dynamic, selected):
457
  """Checks whether all selected fields are valid.
458

459
  @type static: L{utils.FieldSet}
460
  @param static: static fields set
461
  @type dynamic: L{utils.FieldSet}
462
  @param dynamic: dynamic fields set
463

464
  """
465
  f = utils.FieldSet()
466
  f.Extend(static)
467
  f.Extend(dynamic)
468

    
469
  delta = f.NonMatching(selected)
470
  if delta:
471
    raise errors.OpPrereqError("Unknown output fields selected: %s"
472
                               % ",".join(delta))
473

    
474

    
475
def _CheckBooleanOpField(op, name):
476
  """Validates boolean opcode parameters.
477

478
  This will ensure that an opcode parameter is either a boolean value,
479
  or None (but that it always exists).
480

481
  """
482
  val = getattr(op, name, None)
483
  if not (val is None or isinstance(val, bool)):
484
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
485
                               (name, str(val)))
486
  setattr(op, name, val)
487

    
488

    
489
def _CheckNodeOnline(lu, node):
490
  """Ensure that a given node is online.
491

492
  @param lu: the LU on behalf of which we make the check
493
  @param node: the node to check
494
  @raise errors.OpPrereqError: if the node is offline
495

496
  """
497
  if lu.cfg.GetNodeInfo(node).offline:
498
    raise errors.OpPrereqError("Can't use offline node %s" % node)
499

    
500

    
501
def _CheckNodeNotDrained(lu, node):
502
  """Ensure that a given node is not drained.
503

504
  @param lu: the LU on behalf of which we make the check
505
  @param node: the node to check
506
  @raise errors.OpPrereqError: if the node is drained
507

508
  """
509
  if lu.cfg.GetNodeInfo(node).drained:
510
    raise errors.OpPrereqError("Can't use drained node %s" % node)
511

    
512

    
513
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
514
                          memory, vcpus, nics, disk_template, disks,
515
                          bep, hvp, hypervisor_name):
516
  """Builds instance related env variables for hooks
517

518
  This builds the hook environment from individual variables.
519

520
  @type name: string
521
  @param name: the name of the instance
522
  @type primary_node: string
523
  @param primary_node: the name of the instance's primary node
524
  @type secondary_nodes: list
525
  @param secondary_nodes: list of secondary nodes as strings
526
  @type os_type: string
527
  @param os_type: the name of the instance's OS
528
  @type status: boolean
529
  @param status: the should_run status of the instance
530
  @type memory: string
531
  @param memory: the memory size of the instance
532
  @type vcpus: string
533
  @param vcpus: the count of VCPUs the instance has
534
  @type nics: list
535
  @param nics: list of tuples (ip, mac, mode, link) representing
536
      the NICs the instance has
537
  @type disk_template: string
538
  @param disk_template: the disk template of the instance
539
  @type disks: list
540
  @param disks: the list of (size, mode) pairs
541
  @type bep: dict
542
  @param bep: the backend parameters for the instance
543
  @type hvp: dict
544
  @param hvp: the hypervisor parameters for the instance
545
  @type hypervisor_name: string
546
  @param hypervisor_name: the hypervisor for the instance
547
  @rtype: dict
548
  @return: the hook environment for this instance
549

550
  """
551
  if status:
552
    str_status = "up"
553
  else:
554
    str_status = "down"
555
  env = {
556
    "OP_TARGET": name,
557
    "INSTANCE_NAME": name,
558
    "INSTANCE_PRIMARY": primary_node,
559
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
560
    "INSTANCE_OS_TYPE": os_type,
561
    "INSTANCE_STATUS": str_status,
562
    "INSTANCE_MEMORY": memory,
563
    "INSTANCE_VCPUS": vcpus,
564
    "INSTANCE_DISK_TEMPLATE": disk_template,
565
    "INSTANCE_HYPERVISOR": hypervisor_name,
566
  }
567

    
568
  if nics:
569
    nic_count = len(nics)
570
    for idx, (ip, mac, mode, link) in enumerate(nics):
571
      if ip is None:
572
        ip = ""
573
      env["INSTANCE_NIC%d_IP" % idx] = ip
574
      env["INSTANCE_NIC%d_MAC" % idx] = mac
575
      env["INSTANCE_NIC%d_MODE" % idx] = mode
576
      env["INSTANCE_NIC%d_LINK" % idx] = link
577
      if mode == constants.NIC_MODE_BRIDGED:
578
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
579
  else:
580
    nic_count = 0
581

    
582
  env["INSTANCE_NIC_COUNT"] = nic_count
583

    
584
  if disks:
585
    disk_count = len(disks)
586
    for idx, (size, mode) in enumerate(disks):
587
      env["INSTANCE_DISK%d_SIZE" % idx] = size
588
      env["INSTANCE_DISK%d_MODE" % idx] = mode
589
  else:
590
    disk_count = 0
591

    
592
  env["INSTANCE_DISK_COUNT"] = disk_count
593

    
594
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
595
    for key, value in source.items():
596
      env["INSTANCE_%s_%s" % (kind, key)] = value
597

    
598
  return env
599

    
600
def _NICListToTuple(lu, nics):
601
  """Build a list of nic information tuples.
602

603
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
604
  value in LUQueryInstanceData.
605

606
  @type lu:  L{LogicalUnit}
607
  @param lu: the logical unit on whose behalf we execute
608
  @type nics: list of L{objects.NIC}
609
  @param nics: list of nics to convert to hooks tuples
610

611
  """
612
  hooks_nics = []
613
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
614
  for nic in nics:
615
    ip = nic.ip
616
    mac = nic.mac
617
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
618
    mode = filled_params[constants.NIC_MODE]
619
    link = filled_params[constants.NIC_LINK]
620
    hooks_nics.append((ip, mac, mode, link))
621
  return hooks_nics
622

    
623
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
624
  """Builds instance related env variables for hooks from an object.
625

626
  @type lu: L{LogicalUnit}
627
  @param lu: the logical unit on whose behalf we execute
628
  @type instance: L{objects.Instance}
629
  @param instance: the instance for which we should build the
630
      environment
631
  @type override: dict
632
  @param override: dictionary with key/values that will override
633
      our values
634
  @rtype: dict
635
  @return: the hook environment dictionary
636

637
  """
638
  cluster = lu.cfg.GetClusterInfo()
639
  bep = cluster.FillBE(instance)
640
  hvp = cluster.FillHV(instance)
641
  args = {
642
    'name': instance.name,
643
    'primary_node': instance.primary_node,
644
    'secondary_nodes': instance.secondary_nodes,
645
    'os_type': instance.os,
646
    'status': instance.admin_up,
647
    'memory': bep[constants.BE_MEMORY],
648
    'vcpus': bep[constants.BE_VCPUS],
649
    'nics': _NICListToTuple(lu, instance.nics),
650
    'disk_template': instance.disk_template,
651
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
652
    'bep': bep,
653
    'hvp': hvp,
654
    'hypervisor_name': instance.hypervisor,
655
  }
656
  if override:
657
    args.update(override)
658
  return _BuildInstanceHookEnv(**args)
659

    
660

    
661
def _AdjustCandidatePool(lu):
662
  """Adjust the candidate pool after node operations.
663

664
  """
665
  mod_list = lu.cfg.MaintainCandidatePool()
666
  if mod_list:
667
    lu.LogInfo("Promoted nodes to master candidate role: %s",
668
               ", ".join(node.name for node in mod_list))
669
    for name in mod_list:
670
      lu.context.ReaddNode(name)
671
  mc_now, mc_max = lu.cfg.GetMasterCandidateStats()
672
  if mc_now > mc_max:
673
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
674
               (mc_now, mc_max))
675

    
676

    
677
def _CheckNicsBridgesExist(lu, target_nics, target_node,
678
                               profile=constants.PP_DEFAULT):
679
  """Check that the brigdes needed by a list of nics exist.
680

681
  """
682
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
683
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
684
                for nic in target_nics]
685
  brlist = [params[constants.NIC_LINK] for params in paramslist
686
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
687
  if brlist:
688
    result = lu.rpc.call_bridges_exist(target_node, brlist)
689
    result.Raise("Error checking bridges on destination node '%s'" %
690
                 target_node, prereq=True)
691

    
692

    
693
def _CheckInstanceBridgesExist(lu, instance, node=None):
694
  """Check that the brigdes needed by an instance exist.
695

696
  """
697
  if node is None:
698
    node = instance.primary_node
699
  _CheckNicsBridgesExist(lu, instance.nics, node)
700

    
701

    
702
def _GetNodeSecondaryInstances(cfg, node_name):
703
  """Returns secondary instances on a node.
704

705
  """
706
  instances = []
707

    
708
  for (_, inst) in cfg.GetAllInstancesInfo().iteritems():
709
    if node_name in inst.secondary_nodes:
710
      instances.append(inst)
711

    
712
  return instances
713

    
714

    
715
class LUDestroyCluster(NoHooksLU):
716
  """Logical unit for destroying the cluster.
717

718
  """
719
  _OP_REQP = []
720

    
721
  def CheckPrereq(self):
722
    """Check prerequisites.
723

724
    This checks whether the cluster is empty.
725

726
    Any errors are signaled by raising errors.OpPrereqError.
727

728
    """
729
    master = self.cfg.GetMasterNode()
730

    
731
    nodelist = self.cfg.GetNodeList()
732
    if len(nodelist) != 1 or nodelist[0] != master:
733
      raise errors.OpPrereqError("There are still %d node(s) in"
734
                                 " this cluster." % (len(nodelist) - 1))
735
    instancelist = self.cfg.GetInstanceList()
736
    if instancelist:
737
      raise errors.OpPrereqError("There are still %d instance(s) in"
738
                                 " this cluster." % len(instancelist))
739

    
740
  def Exec(self, feedback_fn):
741
    """Destroys the cluster.
742

743
    """
744
    master = self.cfg.GetMasterNode()
745
    result = self.rpc.call_node_stop_master(master, False)
746
    result.Raise("Could not disable the master role")
747
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
748
    utils.CreateBackup(priv_key)
749
    utils.CreateBackup(pub_key)
750
    return master
751

    
752

    
753
class LUVerifyCluster(LogicalUnit):
754
  """Verifies the cluster status.
755

756
  """
757
  HPATH = "cluster-verify"
758
  HTYPE = constants.HTYPE_CLUSTER
759
  _OP_REQP = ["skip_checks"]
760
  REQ_BGL = False
761

    
762
  def ExpandNames(self):
763
    self.needed_locks = {
764
      locking.LEVEL_NODE: locking.ALL_SET,
765
      locking.LEVEL_INSTANCE: locking.ALL_SET,
766
    }
767
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
768

    
769
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
770
                  node_result, feedback_fn, master_files,
771
                  drbd_map, vg_name):
772
    """Run multiple tests against a node.
773

774
    Test list:
775

776
      - compares ganeti version
777
      - checks vg existence and size > 20G
778
      - checks config file checksum
779
      - checks ssh to other nodes
780

781
    @type nodeinfo: L{objects.Node}
782
    @param nodeinfo: the node to check
783
    @param file_list: required list of files
784
    @param local_cksum: dictionary of local files and their checksums
785
    @param node_result: the results from the node
786
    @param feedback_fn: function used to accumulate results
787
    @param master_files: list of files that only masters should have
788
    @param drbd_map: the useddrbd minors for this node, in
789
        form of minor: (instance, must_exist) which correspond to instances
790
        and their running status
791
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
792

793
    """
794
    node = nodeinfo.name
795

    
796
    # main result, node_result should be a non-empty dict
797
    if not node_result or not isinstance(node_result, dict):
798
      feedback_fn("  - ERROR: unable to verify node %s." % (node,))
799
      return True
800

    
801
    # compares ganeti version
802
    local_version = constants.PROTOCOL_VERSION
803
    remote_version = node_result.get('version', None)
804
    if not (remote_version and isinstance(remote_version, (list, tuple)) and
805
            len(remote_version) == 2):
806
      feedback_fn("  - ERROR: connection to %s failed" % (node))
807
      return True
808

    
809
    if local_version != remote_version[0]:
810
      feedback_fn("  - ERROR: incompatible protocol versions: master %s,"
811
                  " node %s %s" % (local_version, node, remote_version[0]))
812
      return True
813

    
814
    # node seems compatible, we can actually try to look into its results
815

    
816
    bad = False
817

    
818
    # full package version
819
    if constants.RELEASE_VERSION != remote_version[1]:
820
      feedback_fn("  - WARNING: software version mismatch: master %s,"
821
                  " node %s %s" %
822
                  (constants.RELEASE_VERSION, node, remote_version[1]))
823

    
824
    # checks vg existence and size > 20G
825
    if vg_name is not None:
826
      vglist = node_result.get(constants.NV_VGLIST, None)
827
      if not vglist:
828
        feedback_fn("  - ERROR: unable to check volume groups on node %s." %
829
                        (node,))
830
        bad = True
831
      else:
832
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
833
                                              constants.MIN_VG_SIZE)
834
        if vgstatus:
835
          feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
836
          bad = True
837

    
838
    # checks config file checksum
839

    
840
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
841
    if not isinstance(remote_cksum, dict):
842
      bad = True
843
      feedback_fn("  - ERROR: node hasn't returned file checksum data")
844
    else:
845
      for file_name in file_list:
846
        node_is_mc = nodeinfo.master_candidate
847
        must_have_file = file_name not in master_files
848
        if file_name not in remote_cksum:
849
          if node_is_mc or must_have_file:
850
            bad = True
851
            feedback_fn("  - ERROR: file '%s' missing" % file_name)
852
        elif remote_cksum[file_name] != local_cksum[file_name]:
853
          if node_is_mc or must_have_file:
854
            bad = True
855
            feedback_fn("  - ERROR: file '%s' has wrong checksum" % file_name)
856
          else:
857
            # not candidate and this is not a must-have file
858
            bad = True
859
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
860
                        " candidates (and the file is outdated)" % file_name)
861
        else:
862
          # all good, except non-master/non-must have combination
863
          if not node_is_mc and not must_have_file:
864
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
865
                        " candidates" % file_name)
866

    
867
    # checks ssh to any
868

    
869
    if constants.NV_NODELIST not in node_result:
870
      bad = True
871
      feedback_fn("  - ERROR: node hasn't returned node ssh connectivity data")
872
    else:
873
      if node_result[constants.NV_NODELIST]:
874
        bad = True
875
        for node in node_result[constants.NV_NODELIST]:
876
          feedback_fn("  - ERROR: ssh communication with node '%s': %s" %
877
                          (node, node_result[constants.NV_NODELIST][node]))
878

    
879
    if constants.NV_NODENETTEST not in node_result:
880
      bad = True
881
      feedback_fn("  - ERROR: node hasn't returned node tcp connectivity data")
882
    else:
883
      if node_result[constants.NV_NODENETTEST]:
884
        bad = True
885
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
886
        for node in nlist:
887
          feedback_fn("  - ERROR: tcp communication with node '%s': %s" %
888
                          (node, node_result[constants.NV_NODENETTEST][node]))
889

    
890
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
891
    if isinstance(hyp_result, dict):
892
      for hv_name, hv_result in hyp_result.iteritems():
893
        if hv_result is not None:
894
          feedback_fn("  - ERROR: hypervisor %s verify failure: '%s'" %
895
                      (hv_name, hv_result))
896

    
897
    # check used drbd list
898
    if vg_name is not None:
899
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
900
      if not isinstance(used_minors, (tuple, list)):
901
        feedback_fn("  - ERROR: cannot parse drbd status file: %s" %
902
                    str(used_minors))
903
      else:
904
        for minor, (iname, must_exist) in drbd_map.items():
905
          if minor not in used_minors and must_exist:
906
            feedback_fn("  - ERROR: drbd minor %d of instance %s is"
907
                        " not active" % (minor, iname))
908
            bad = True
909
        for minor in used_minors:
910
          if minor not in drbd_map:
911
            feedback_fn("  - ERROR: unallocated drbd minor %d is in use" %
912
                        minor)
913
            bad = True
914

    
915
    return bad
916

    
917
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
918
                      node_instance, feedback_fn, n_offline):
919
    """Verify an instance.
920

921
    This function checks to see if the required block devices are
922
    available on the instance's node.
923

924
    """
925
    bad = False
926

    
927
    node_current = instanceconfig.primary_node
928

    
929
    node_vol_should = {}
930
    instanceconfig.MapLVsByNode(node_vol_should)
931

    
932
    for node in node_vol_should:
933
      if node in n_offline:
934
        # ignore missing volumes on offline nodes
935
        continue
936
      for volume in node_vol_should[node]:
937
        if node not in node_vol_is or volume not in node_vol_is[node]:
938
          feedback_fn("  - ERROR: volume %s missing on node %s" %
939
                          (volume, node))
940
          bad = True
941

    
942
    if instanceconfig.admin_up:
943
      if ((node_current not in node_instance or
944
          not instance in node_instance[node_current]) and
945
          node_current not in n_offline):
946
        feedback_fn("  - ERROR: instance %s not running on node %s" %
947
                        (instance, node_current))
948
        bad = True
949

    
950
    for node in node_instance:
951
      if (not node == node_current):
952
        if instance in node_instance[node]:
953
          feedback_fn("  - ERROR: instance %s should not run on node %s" %
954
                          (instance, node))
955
          bad = True
956

    
957
    return bad
958

    
959
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
960
    """Verify if there are any unknown volumes in the cluster.
961

962
    The .os, .swap and backup volumes are ignored. All other volumes are
963
    reported as unknown.
964

965
    """
966
    bad = False
967

    
968
    for node in node_vol_is:
969
      for volume in node_vol_is[node]:
970
        if node not in node_vol_should or volume not in node_vol_should[node]:
971
          feedback_fn("  - ERROR: volume %s on node %s should not exist" %
972
                      (volume, node))
973
          bad = True
974
    return bad
975

    
976
  def _VerifyOrphanInstances(self, instancelist, node_instance, feedback_fn):
977
    """Verify the list of running instances.
978

979
    This checks what instances are running but unknown to the cluster.
980

981
    """
982
    bad = False
983
    for node in node_instance:
984
      for runninginstance in node_instance[node]:
985
        if runninginstance not in instancelist:
986
          feedback_fn("  - ERROR: instance %s on node %s should not exist" %
987
                          (runninginstance, node))
988
          bad = True
989
    return bad
990

    
991
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg, feedback_fn):
992
    """Verify N+1 Memory Resilience.
993

994
    Check that if one single node dies we can still start all the instances it
995
    was primary for.
996

997
    """
998
    bad = False
999

    
1000
    for node, nodeinfo in node_info.iteritems():
1001
      # This code checks that every node which is now listed as secondary has
1002
      # enough memory to host all instances it is supposed to should a single
1003
      # other node in the cluster fail.
1004
      # FIXME: not ready for failover to an arbitrary node
1005
      # FIXME: does not support file-backed instances
1006
      # WARNING: we currently take into account down instances as well as up
1007
      # ones, considering that even if they're down someone might want to start
1008
      # them even in the event of a node failure.
1009
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1010
        needed_mem = 0
1011
        for instance in instances:
1012
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1013
          if bep[constants.BE_AUTO_BALANCE]:
1014
            needed_mem += bep[constants.BE_MEMORY]
1015
        if nodeinfo['mfree'] < needed_mem:
1016
          feedback_fn("  - ERROR: not enough memory on node %s to accommodate"
1017
                      " failovers should node %s fail" % (node, prinode))
1018
          bad = True
1019
    return bad
1020

    
1021
  def CheckPrereq(self):
1022
    """Check prerequisites.
1023

1024
    Transform the list of checks we're going to skip into a set and check that
1025
    all its members are valid.
1026

1027
    """
1028
    self.skip_set = frozenset(self.op.skip_checks)
1029
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1030
      raise errors.OpPrereqError("Invalid checks to be skipped specified")
1031

    
1032
  def BuildHooksEnv(self):
1033
    """Build hooks env.
1034

1035
    Cluster-Verify hooks just ran in the post phase and their failure makes
1036
    the output be logged in the verify output and the verification to fail.
1037

1038
    """
1039
    all_nodes = self.cfg.GetNodeList()
1040
    env = {
1041
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1042
      }
1043
    for node in self.cfg.GetAllNodesInfo().values():
1044
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1045

    
1046
    return env, [], all_nodes
1047

    
1048
  def Exec(self, feedback_fn):
1049
    """Verify integrity of cluster, performing various test on nodes.
1050

1051
    """
1052
    bad = False
1053
    feedback_fn("* Verifying global settings")
1054
    for msg in self.cfg.VerifyConfig():
1055
      feedback_fn("  - ERROR: %s" % msg)
1056

    
1057
    vg_name = self.cfg.GetVGName()
1058
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1059
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1060
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1061
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1062
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1063
                        for iname in instancelist)
1064
    i_non_redundant = [] # Non redundant instances
1065
    i_non_a_balanced = [] # Non auto-balanced instances
1066
    n_offline = [] # List of offline nodes
1067
    n_drained = [] # List of nodes being drained
1068
    node_volume = {}
1069
    node_instance = {}
1070
    node_info = {}
1071
    instance_cfg = {}
1072

    
1073
    # FIXME: verify OS list
1074
    # do local checksums
1075
    master_files = [constants.CLUSTER_CONF_FILE]
1076

    
1077
    file_names = ssconf.SimpleStore().GetFileList()
1078
    file_names.append(constants.SSL_CERT_FILE)
1079
    file_names.append(constants.RAPI_CERT_FILE)
1080
    file_names.extend(master_files)
1081

    
1082
    local_checksums = utils.FingerprintFiles(file_names)
1083

    
1084
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1085
    node_verify_param = {
1086
      constants.NV_FILELIST: file_names,
1087
      constants.NV_NODELIST: [node.name for node in nodeinfo
1088
                              if not node.offline],
1089
      constants.NV_HYPERVISOR: hypervisors,
1090
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1091
                                  node.secondary_ip) for node in nodeinfo
1092
                                 if not node.offline],
1093
      constants.NV_INSTANCELIST: hypervisors,
1094
      constants.NV_VERSION: None,
1095
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1096
      }
1097
    if vg_name is not None:
1098
      node_verify_param[constants.NV_VGLIST] = None
1099
      node_verify_param[constants.NV_LVLIST] = vg_name
1100
      node_verify_param[constants.NV_DRBDLIST] = None
1101
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1102
                                           self.cfg.GetClusterName())
1103

    
1104
    cluster = self.cfg.GetClusterInfo()
1105
    master_node = self.cfg.GetMasterNode()
1106
    all_drbd_map = self.cfg.ComputeDRBDMap()
1107

    
1108
    for node_i in nodeinfo:
1109
      node = node_i.name
1110

    
1111
      if node_i.offline:
1112
        feedback_fn("* Skipping offline node %s" % (node,))
1113
        n_offline.append(node)
1114
        continue
1115

    
1116
      if node == master_node:
1117
        ntype = "master"
1118
      elif node_i.master_candidate:
1119
        ntype = "master candidate"
1120
      elif node_i.drained:
1121
        ntype = "drained"
1122
        n_drained.append(node)
1123
      else:
1124
        ntype = "regular"
1125
      feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1126

    
1127
      msg = all_nvinfo[node].fail_msg
1128
      if msg:
1129
        feedback_fn("  - ERROR: while contacting node %s: %s" % (node, msg))
1130
        bad = True
1131
        continue
1132

    
1133
      nresult = all_nvinfo[node].payload
1134
      node_drbd = {}
1135
      for minor, instance in all_drbd_map[node].items():
1136
        if instance not in instanceinfo:
1137
          feedback_fn("  - ERROR: ghost instance '%s' in temporary DRBD map" %
1138
                      instance)
1139
          # ghost instance should not be running, but otherwise we
1140
          # don't give double warnings (both ghost instance and
1141
          # unallocated minor in use)
1142
          node_drbd[minor] = (instance, False)
1143
        else:
1144
          instance = instanceinfo[instance]
1145
          node_drbd[minor] = (instance.name, instance.admin_up)
1146
      result = self._VerifyNode(node_i, file_names, local_checksums,
1147
                                nresult, feedback_fn, master_files,
1148
                                node_drbd, vg_name)
1149
      bad = bad or result
1150

    
1151
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1152
      if vg_name is None:
1153
        node_volume[node] = {}
1154
      elif isinstance(lvdata, basestring):
1155
        feedback_fn("  - ERROR: LVM problem on node %s: %s" %
1156
                    (node, utils.SafeEncode(lvdata)))
1157
        bad = True
1158
        node_volume[node] = {}
1159
      elif not isinstance(lvdata, dict):
1160
        feedback_fn("  - ERROR: connection to %s failed (lvlist)" % (node,))
1161
        bad = True
1162
        continue
1163
      else:
1164
        node_volume[node] = lvdata
1165

    
1166
      # node_instance
1167
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1168
      if not isinstance(idata, list):
1169
        feedback_fn("  - ERROR: connection to %s failed (instancelist)" %
1170
                    (node,))
1171
        bad = True
1172
        continue
1173

    
1174
      node_instance[node] = idata
1175

    
1176
      # node_info
1177
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1178
      if not isinstance(nodeinfo, dict):
1179
        feedback_fn("  - ERROR: connection to %s failed (hvinfo)" % (node,))
1180
        bad = True
1181
        continue
1182

    
1183
      try:
1184
        node_info[node] = {
1185
          "mfree": int(nodeinfo['memory_free']),
1186
          "pinst": [],
1187
          "sinst": [],
1188
          # dictionary holding all instances this node is secondary for,
1189
          # grouped by their primary node. Each key is a cluster node, and each
1190
          # value is a list of instances which have the key as primary and the
1191
          # current node as secondary.  this is handy to calculate N+1 memory
1192
          # availability if you can only failover from a primary to its
1193
          # secondary.
1194
          "sinst-by-pnode": {},
1195
        }
1196
        # FIXME: devise a free space model for file based instances as well
1197
        if vg_name is not None:
1198
          if (constants.NV_VGLIST not in nresult or
1199
              vg_name not in nresult[constants.NV_VGLIST]):
1200
            feedback_fn("  - ERROR: node %s didn't return data for the"
1201
                        " volume group '%s' - it is either missing or broken" %
1202
                        (node, vg_name))
1203
            bad = True
1204
            continue
1205
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1206
      except (ValueError, KeyError):
1207
        feedback_fn("  - ERROR: invalid nodeinfo value returned"
1208
                    " from node %s" % (node,))
1209
        bad = True
1210
        continue
1211

    
1212
    node_vol_should = {}
1213

    
1214
    for instance in instancelist:
1215
      feedback_fn("* Verifying instance %s" % instance)
1216
      inst_config = instanceinfo[instance]
1217
      result =  self._VerifyInstance(instance, inst_config, node_volume,
1218
                                     node_instance, feedback_fn, n_offline)
1219
      bad = bad or result
1220
      inst_nodes_offline = []
1221

    
1222
      inst_config.MapLVsByNode(node_vol_should)
1223

    
1224
      instance_cfg[instance] = inst_config
1225

    
1226
      pnode = inst_config.primary_node
1227
      if pnode in node_info:
1228
        node_info[pnode]['pinst'].append(instance)
1229
      elif pnode not in n_offline:
1230
        feedback_fn("  - ERROR: instance %s, connection to primary node"
1231
                    " %s failed" % (instance, pnode))
1232
        bad = True
1233

    
1234
      if pnode in n_offline:
1235
        inst_nodes_offline.append(pnode)
1236

    
1237
      # If the instance is non-redundant we cannot survive losing its primary
1238
      # node, so we are not N+1 compliant. On the other hand we have no disk
1239
      # templates with more than one secondary so that situation is not well
1240
      # supported either.
1241
      # FIXME: does not support file-backed instances
1242
      if len(inst_config.secondary_nodes) == 0:
1243
        i_non_redundant.append(instance)
1244
      elif len(inst_config.secondary_nodes) > 1:
1245
        feedback_fn("  - WARNING: multiple secondaries for instance %s"
1246
                    % instance)
1247

    
1248
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1249
        i_non_a_balanced.append(instance)
1250

    
1251
      for snode in inst_config.secondary_nodes:
1252
        if snode in node_info:
1253
          node_info[snode]['sinst'].append(instance)
1254
          if pnode not in node_info[snode]['sinst-by-pnode']:
1255
            node_info[snode]['sinst-by-pnode'][pnode] = []
1256
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1257
        elif snode not in n_offline:
1258
          feedback_fn("  - ERROR: instance %s, connection to secondary node"
1259
                      " %s failed" % (instance, snode))
1260
          bad = True
1261
        if snode in n_offline:
1262
          inst_nodes_offline.append(snode)
1263

    
1264
      if inst_nodes_offline:
1265
        # warn that the instance lives on offline nodes, and set bad=True
1266
        feedback_fn("  - ERROR: instance lives on offline node(s) %s" %
1267
                    ", ".join(inst_nodes_offline))
1268
        bad = True
1269

    
1270
    feedback_fn("* Verifying orphan volumes")
1271
    result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
1272
                                       feedback_fn)
1273
    bad = bad or result
1274

    
1275
    feedback_fn("* Verifying remaining instances")
1276
    result = self._VerifyOrphanInstances(instancelist, node_instance,
1277
                                         feedback_fn)
1278
    bad = bad or result
1279

    
1280
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1281
      feedback_fn("* Verifying N+1 Memory redundancy")
1282
      result = self._VerifyNPlusOneMemory(node_info, instance_cfg, feedback_fn)
1283
      bad = bad or result
1284

    
1285
    feedback_fn("* Other Notes")
1286
    if i_non_redundant:
1287
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1288
                  % len(i_non_redundant))
1289

    
1290
    if i_non_a_balanced:
1291
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1292
                  % len(i_non_a_balanced))
1293

    
1294
    if n_offline:
1295
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1296

    
1297
    if n_drained:
1298
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1299

    
1300
    return not bad
1301

    
1302
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1303
    """Analyze the post-hooks' result
1304

1305
    This method analyses the hook result, handles it, and sends some
1306
    nicely-formatted feedback back to the user.
1307

1308
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1309
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1310
    @param hooks_results: the results of the multi-node hooks rpc call
1311
    @param feedback_fn: function used send feedback back to the caller
1312
    @param lu_result: previous Exec result
1313
    @return: the new Exec result, based on the previous result
1314
        and hook results
1315

1316
    """
1317
    # We only really run POST phase hooks, and are only interested in
1318
    # their results
1319
    if phase == constants.HOOKS_PHASE_POST:
1320
      # Used to change hooks' output to proper indentation
1321
      indent_re = re.compile('^', re.M)
1322
      feedback_fn("* Hooks Results")
1323
      if not hooks_results:
1324
        feedback_fn("  - ERROR: general communication failure")
1325
        lu_result = 1
1326
      else:
1327
        for node_name in hooks_results:
1328
          show_node_header = True
1329
          res = hooks_results[node_name]
1330
          msg = res.fail_msg
1331
          if msg:
1332
            if res.offline:
1333
              # no need to warn or set fail return value
1334
              continue
1335
            feedback_fn("    Communication failure in hooks execution: %s" %
1336
                        msg)
1337
            lu_result = 1
1338
            continue
1339
          for script, hkr, output in res.payload:
1340
            if hkr == constants.HKR_FAIL:
1341
              # The node header is only shown once, if there are
1342
              # failing hooks on that node
1343
              if show_node_header:
1344
                feedback_fn("  Node %s:" % node_name)
1345
                show_node_header = False
1346
              feedback_fn("    ERROR: Script %s failed, output:" % script)
1347
              output = indent_re.sub('      ', output)
1348
              feedback_fn("%s" % output)
1349
              lu_result = 1
1350

    
1351
      return lu_result
1352

    
1353

    
1354
class LUVerifyDisks(NoHooksLU):
1355
  """Verifies the cluster disks status.
1356

1357
  """
1358
  _OP_REQP = []
1359
  REQ_BGL = False
1360

    
1361
  def ExpandNames(self):
1362
    self.needed_locks = {
1363
      locking.LEVEL_NODE: locking.ALL_SET,
1364
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1365
    }
1366
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1367

    
1368
  def CheckPrereq(self):
1369
    """Check prerequisites.
1370

1371
    This has no prerequisites.
1372

1373
    """
1374
    pass
1375

    
1376
  def Exec(self, feedback_fn):
1377
    """Verify integrity of cluster disks.
1378

1379
    @rtype: tuple of three items
1380
    @return: a tuple of (dict of node-to-node_error, list of instances
1381
        which need activate-disks, dict of instance: (node, volume) for
1382
        missing volumes
1383

1384
    """
1385
    result = res_nodes, res_instances, res_missing = {}, [], {}
1386

    
1387
    vg_name = self.cfg.GetVGName()
1388
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1389
    instances = [self.cfg.GetInstanceInfo(name)
1390
                 for name in self.cfg.GetInstanceList()]
1391

    
1392
    nv_dict = {}
1393
    for inst in instances:
1394
      inst_lvs = {}
1395
      if (not inst.admin_up or
1396
          inst.disk_template not in constants.DTS_NET_MIRROR):
1397
        continue
1398
      inst.MapLVsByNode(inst_lvs)
1399
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1400
      for node, vol_list in inst_lvs.iteritems():
1401
        for vol in vol_list:
1402
          nv_dict[(node, vol)] = inst
1403

    
1404
    if not nv_dict:
1405
      return result
1406

    
1407
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1408

    
1409
    for node in nodes:
1410
      # node_volume
1411
      node_res = node_lvs[node]
1412
      if node_res.offline:
1413
        continue
1414
      msg = node_res.fail_msg
1415
      if msg:
1416
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1417
        res_nodes[node] = msg
1418
        continue
1419

    
1420
      lvs = node_res.payload
1421
      for lv_name, (_, lv_inactive, lv_online) in lvs.items():
1422
        inst = nv_dict.pop((node, lv_name), None)
1423
        if (not lv_online and inst is not None
1424
            and inst.name not in res_instances):
1425
          res_instances.append(inst.name)
1426

    
1427
    # any leftover items in nv_dict are missing LVs, let's arrange the
1428
    # data better
1429
    for key, inst in nv_dict.iteritems():
1430
      if inst.name not in res_missing:
1431
        res_missing[inst.name] = []
1432
      res_missing[inst.name].append(key)
1433

    
1434
    return result
1435

    
1436

    
1437
class LURenameCluster(LogicalUnit):
1438
  """Rename the cluster.
1439

1440
  """
1441
  HPATH = "cluster-rename"
1442
  HTYPE = constants.HTYPE_CLUSTER
1443
  _OP_REQP = ["name"]
1444

    
1445
  def BuildHooksEnv(self):
1446
    """Build hooks env.
1447

1448
    """
1449
    env = {
1450
      "OP_TARGET": self.cfg.GetClusterName(),
1451
      "NEW_NAME": self.op.name,
1452
      }
1453
    mn = self.cfg.GetMasterNode()
1454
    return env, [mn], [mn]
1455

    
1456
  def CheckPrereq(self):
1457
    """Verify that the passed name is a valid one.
1458

1459
    """
1460
    hostname = utils.HostInfo(self.op.name)
1461

    
1462
    new_name = hostname.name
1463
    self.ip = new_ip = hostname.ip
1464
    old_name = self.cfg.GetClusterName()
1465
    old_ip = self.cfg.GetMasterIP()
1466
    if new_name == old_name and new_ip == old_ip:
1467
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1468
                                 " cluster has changed")
1469
    if new_ip != old_ip:
1470
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1471
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1472
                                   " reachable on the network. Aborting." %
1473
                                   new_ip)
1474

    
1475
    self.op.name = new_name
1476

    
1477
  def Exec(self, feedback_fn):
1478
    """Rename the cluster.
1479

1480
    """
1481
    clustername = self.op.name
1482
    ip = self.ip
1483

    
1484
    # shutdown the master IP
1485
    master = self.cfg.GetMasterNode()
1486
    result = self.rpc.call_node_stop_master(master, False)
1487
    result.Raise("Could not disable the master role")
1488

    
1489
    try:
1490
      cluster = self.cfg.GetClusterInfo()
1491
      cluster.cluster_name = clustername
1492
      cluster.master_ip = ip
1493
      self.cfg.Update(cluster)
1494

    
1495
      # update the known hosts file
1496
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1497
      node_list = self.cfg.GetNodeList()
1498
      try:
1499
        node_list.remove(master)
1500
      except ValueError:
1501
        pass
1502
      result = self.rpc.call_upload_file(node_list,
1503
                                         constants.SSH_KNOWN_HOSTS_FILE)
1504
      for to_node, to_result in result.iteritems():
1505
        msg = to_result.fail_msg
1506
        if msg:
1507
          msg = ("Copy of file %s to node %s failed: %s" %
1508
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1509
          self.proc.LogWarning(msg)
1510

    
1511
    finally:
1512
      result = self.rpc.call_node_start_master(master, False, False)
1513
      msg = result.fail_msg
1514
      if msg:
1515
        self.LogWarning("Could not re-enable the master role on"
1516
                        " the master, please restart manually: %s", msg)
1517

    
1518

    
1519
def _RecursiveCheckIfLVMBased(disk):
1520
  """Check if the given disk or its children are lvm-based.
1521

1522
  @type disk: L{objects.Disk}
1523
  @param disk: the disk to check
1524
  @rtype: boolean
1525
  @return: boolean indicating whether a LD_LV dev_type was found or not
1526

1527
  """
1528
  if disk.children:
1529
    for chdisk in disk.children:
1530
      if _RecursiveCheckIfLVMBased(chdisk):
1531
        return True
1532
  return disk.dev_type == constants.LD_LV
1533

    
1534

    
1535
class LUSetClusterParams(LogicalUnit):
1536
  """Change the parameters of the cluster.
1537

1538
  """
1539
  HPATH = "cluster-modify"
1540
  HTYPE = constants.HTYPE_CLUSTER
1541
  _OP_REQP = []
1542
  REQ_BGL = False
1543

    
1544
  def CheckArguments(self):
1545
    """Check parameters
1546

1547
    """
1548
    if not hasattr(self.op, "candidate_pool_size"):
1549
      self.op.candidate_pool_size = None
1550
    if self.op.candidate_pool_size is not None:
1551
      try:
1552
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1553
      except (ValueError, TypeError), err:
1554
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1555
                                   str(err))
1556
      if self.op.candidate_pool_size < 1:
1557
        raise errors.OpPrereqError("At least one master candidate needed")
1558

    
1559
  def ExpandNames(self):
1560
    # FIXME: in the future maybe other cluster params won't require checking on
1561
    # all nodes to be modified.
1562
    self.needed_locks = {
1563
      locking.LEVEL_NODE: locking.ALL_SET,
1564
    }
1565
    self.share_locks[locking.LEVEL_NODE] = 1
1566

    
1567
  def BuildHooksEnv(self):
1568
    """Build hooks env.
1569

1570
    """
1571
    env = {
1572
      "OP_TARGET": self.cfg.GetClusterName(),
1573
      "NEW_VG_NAME": self.op.vg_name,
1574
      }
1575
    mn = self.cfg.GetMasterNode()
1576
    return env, [mn], [mn]
1577

    
1578
  def CheckPrereq(self):
1579
    """Check prerequisites.
1580

1581
    This checks whether the given params don't conflict and
1582
    if the given volume group is valid.
1583

1584
    """
1585
    if self.op.vg_name is not None and not self.op.vg_name:
1586
      instances = self.cfg.GetAllInstancesInfo().values()
1587
      for inst in instances:
1588
        for disk in inst.disks:
1589
          if _RecursiveCheckIfLVMBased(disk):
1590
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1591
                                       " lvm-based instances exist")
1592

    
1593
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1594

    
1595
    # if vg_name not None, checks given volume group on all nodes
1596
    if self.op.vg_name:
1597
      vglist = self.rpc.call_vg_list(node_list)
1598
      for node in node_list:
1599
        msg = vglist[node].fail_msg
1600
        if msg:
1601
          # ignoring down node
1602
          self.LogWarning("Error while gathering data on node %s"
1603
                          " (ignoring node): %s", node, msg)
1604
          continue
1605
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
1606
                                              self.op.vg_name,
1607
                                              constants.MIN_VG_SIZE)
1608
        if vgstatus:
1609
          raise errors.OpPrereqError("Error on node '%s': %s" %
1610
                                     (node, vgstatus))
1611

    
1612
    self.cluster = cluster = self.cfg.GetClusterInfo()
1613
    # validate params changes
1614
    if self.op.beparams:
1615
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1616
      self.new_beparams = objects.FillDict(
1617
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
1618

    
1619
    if self.op.nicparams:
1620
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
1621
      self.new_nicparams = objects.FillDict(
1622
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
1623
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
1624

    
1625
    # hypervisor list/parameters
1626
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
1627
    if self.op.hvparams:
1628
      if not isinstance(self.op.hvparams, dict):
1629
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
1630
      for hv_name, hv_dict in self.op.hvparams.items():
1631
        if hv_name not in self.new_hvparams:
1632
          self.new_hvparams[hv_name] = hv_dict
1633
        else:
1634
          self.new_hvparams[hv_name].update(hv_dict)
1635

    
1636
    if self.op.enabled_hypervisors is not None:
1637
      self.hv_list = self.op.enabled_hypervisors
1638
      if not self.hv_list:
1639
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
1640
                                   " least one member")
1641
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
1642
      if invalid_hvs:
1643
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
1644
                                   " entries: %s" % invalid_hvs)
1645
    else:
1646
      self.hv_list = cluster.enabled_hypervisors
1647

    
1648
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
1649
      # either the enabled list has changed, or the parameters have, validate
1650
      for hv_name, hv_params in self.new_hvparams.items():
1651
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
1652
            (self.op.enabled_hypervisors and
1653
             hv_name in self.op.enabled_hypervisors)):
1654
          # either this is a new hypervisor, or its parameters have changed
1655
          hv_class = hypervisor.GetHypervisor(hv_name)
1656
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1657
          hv_class.CheckParameterSyntax(hv_params)
1658
          _CheckHVParams(self, node_list, hv_name, hv_params)
1659

    
1660
  def Exec(self, feedback_fn):
1661
    """Change the parameters of the cluster.
1662

1663
    """
1664
    if self.op.vg_name is not None:
1665
      new_volume = self.op.vg_name
1666
      if not new_volume:
1667
        new_volume = None
1668
      if new_volume != self.cfg.GetVGName():
1669
        self.cfg.SetVGName(new_volume)
1670
      else:
1671
        feedback_fn("Cluster LVM configuration already in desired"
1672
                    " state, not changing")
1673
    if self.op.hvparams:
1674
      self.cluster.hvparams = self.new_hvparams
1675
    if self.op.enabled_hypervisors is not None:
1676
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1677
    if self.op.beparams:
1678
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
1679
    if self.op.nicparams:
1680
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
1681

    
1682
    if self.op.candidate_pool_size is not None:
1683
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
1684
      # we need to update the pool size here, otherwise the save will fail
1685
      _AdjustCandidatePool(self)
1686

    
1687
    self.cfg.Update(self.cluster)
1688

    
1689

    
1690
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
1691
  """Distribute additional files which are part of the cluster configuration.
1692

1693
  ConfigWriter takes care of distributing the config and ssconf files, but
1694
  there are more files which should be distributed to all nodes. This function
1695
  makes sure those are copied.
1696

1697
  @param lu: calling logical unit
1698
  @param additional_nodes: list of nodes not in the config to distribute to
1699

1700
  """
1701
  # 1. Gather target nodes
1702
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
1703
  dist_nodes = lu.cfg.GetNodeList()
1704
  if additional_nodes is not None:
1705
    dist_nodes.extend(additional_nodes)
1706
  if myself.name in dist_nodes:
1707
    dist_nodes.remove(myself.name)
1708
  # 2. Gather files to distribute
1709
  dist_files = set([constants.ETC_HOSTS,
1710
                    constants.SSH_KNOWN_HOSTS_FILE,
1711
                    constants.RAPI_CERT_FILE,
1712
                    constants.RAPI_USERS_FILE,
1713
                    constants.HMAC_CLUSTER_KEY,
1714
                   ])
1715

    
1716
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
1717
  for hv_name in enabled_hypervisors:
1718
    hv_class = hypervisor.GetHypervisor(hv_name)
1719
    dist_files.update(hv_class.GetAncillaryFiles())
1720

    
1721
  # 3. Perform the files upload
1722
  for fname in dist_files:
1723
    if os.path.exists(fname):
1724
      result = lu.rpc.call_upload_file(dist_nodes, fname)
1725
      for to_node, to_result in result.items():
1726
        msg = to_result.fail_msg
1727
        if msg:
1728
          msg = ("Copy of file %s to node %s failed: %s" %
1729
                 (fname, to_node, msg))
1730
          lu.proc.LogWarning(msg)
1731

    
1732

    
1733
class LURedistributeConfig(NoHooksLU):
1734
  """Force the redistribution of cluster configuration.
1735

1736
  This is a very simple LU.
1737

1738
  """
1739
  _OP_REQP = []
1740
  REQ_BGL = False
1741

    
1742
  def ExpandNames(self):
1743
    self.needed_locks = {
1744
      locking.LEVEL_NODE: locking.ALL_SET,
1745
    }
1746
    self.share_locks[locking.LEVEL_NODE] = 1
1747

    
1748
  def CheckPrereq(self):
1749
    """Check prerequisites.
1750

1751
    """
1752

    
1753
  def Exec(self, feedback_fn):
1754
    """Redistribute the configuration.
1755

1756
    """
1757
    self.cfg.Update(self.cfg.GetClusterInfo())
1758
    _RedistributeAncillaryFiles(self)
1759

    
1760

    
1761
def _WaitForSync(lu, instance, oneshot=False, unlock=False):
1762
  """Sleep and poll for an instance's disk to sync.
1763

1764
  """
1765
  if not instance.disks:
1766
    return True
1767

    
1768
  if not oneshot:
1769
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
1770

    
1771
  node = instance.primary_node
1772

    
1773
  for dev in instance.disks:
1774
    lu.cfg.SetDiskID(dev, node)
1775

    
1776
  retries = 0
1777
  degr_retries = 10 # in seconds, as we sleep 1 second each time
1778
  while True:
1779
    max_time = 0
1780
    done = True
1781
    cumul_degraded = False
1782
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
1783
    msg = rstats.fail_msg
1784
    if msg:
1785
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
1786
      retries += 1
1787
      if retries >= 10:
1788
        raise errors.RemoteError("Can't contact node %s for mirror data,"
1789
                                 " aborting." % node)
1790
      time.sleep(6)
1791
      continue
1792
    rstats = rstats.payload
1793
    retries = 0
1794
    for i, mstat in enumerate(rstats):
1795
      if mstat is None:
1796
        lu.LogWarning("Can't compute data for node %s/%s",
1797
                           node, instance.disks[i].iv_name)
1798
        continue
1799
      # we ignore the ldisk parameter
1800
      perc_done, est_time, is_degraded, _ = mstat
1801
      cumul_degraded = cumul_degraded or (is_degraded and perc_done is None)
1802
      if perc_done is not None:
1803
        done = False
1804
        if est_time is not None:
1805
          rem_time = "%d estimated seconds remaining" % est_time
1806
          max_time = est_time
1807
        else:
1808
          rem_time = "no time estimate"
1809
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
1810
                        (instance.disks[i].iv_name, perc_done, rem_time))
1811

    
1812
    # if we're done but degraded, let's do a few small retries, to
1813
    # make sure we see a stable and not transient situation; therefore
1814
    # we force restart of the loop
1815
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
1816
      logging.info("Degraded disks found, %d retries left", degr_retries)
1817
      degr_retries -= 1
1818
      time.sleep(1)
1819
      continue
1820

    
1821
    if done or oneshot:
1822
      break
1823

    
1824
    time.sleep(min(60, max_time))
1825

    
1826
  if done:
1827
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
1828
  return not cumul_degraded
1829

    
1830

    
1831
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
1832
  """Check that mirrors are not degraded.
1833

1834
  The ldisk parameter, if True, will change the test from the
1835
  is_degraded attribute (which represents overall non-ok status for
1836
  the device(s)) to the ldisk (representing the local storage status).
1837

1838
  """
1839
  lu.cfg.SetDiskID(dev, node)
1840
  if ldisk:
1841
    idx = 6
1842
  else:
1843
    idx = 5
1844

    
1845
  result = True
1846
  if on_primary or dev.AssembleOnSecondary():
1847
    rstats = lu.rpc.call_blockdev_find(node, dev)
1848
    msg = rstats.fail_msg
1849
    if msg:
1850
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
1851
      result = False
1852
    elif not rstats.payload:
1853
      lu.LogWarning("Can't find disk on node %s", node)
1854
      result = False
1855
    else:
1856
      result = result and (not rstats.payload[idx])
1857
  if dev.children:
1858
    for child in dev.children:
1859
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
1860

    
1861
  return result
1862

    
1863

    
1864
class LUDiagnoseOS(NoHooksLU):
1865
  """Logical unit for OS diagnose/query.
1866

1867
  """
1868
  _OP_REQP = ["output_fields", "names"]
1869
  REQ_BGL = False
1870
  _FIELDS_STATIC = utils.FieldSet()
1871
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status")
1872

    
1873
  def ExpandNames(self):
1874
    if self.op.names:
1875
      raise errors.OpPrereqError("Selective OS query not supported")
1876

    
1877
    _CheckOutputFields(static=self._FIELDS_STATIC,
1878
                       dynamic=self._FIELDS_DYNAMIC,
1879
                       selected=self.op.output_fields)
1880

    
1881
    # Lock all nodes, in shared mode
1882
    # Temporary removal of locks, should be reverted later
1883
    # TODO: reintroduce locks when they are lighter-weight
1884
    self.needed_locks = {}
1885
    #self.share_locks[locking.LEVEL_NODE] = 1
1886
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
1887

    
1888
  def CheckPrereq(self):
1889
    """Check prerequisites.
1890

1891
    """
1892

    
1893
  @staticmethod
1894
  def _DiagnoseByOS(node_list, rlist):
1895
    """Remaps a per-node return list into an a per-os per-node dictionary
1896

1897
    @param node_list: a list with the names of all nodes
1898
    @param rlist: a map with node names as keys and OS objects as values
1899

1900
    @rtype: dict
1901
    @return: a dictionary with osnames as keys and as value another map, with
1902
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
1903

1904
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
1905
                                     (/srv/..., False, "invalid api")],
1906
                           "node2": [(/srv/..., True, "")]}
1907
          }
1908

1909
    """
1910
    all_os = {}
1911
    # we build here the list of nodes that didn't fail the RPC (at RPC
1912
    # level), so that nodes with a non-responding node daemon don't
1913
    # make all OSes invalid
1914
    good_nodes = [node_name for node_name in rlist
1915
                  if not rlist[node_name].fail_msg]
1916
    for node_name, nr in rlist.items():
1917
      if nr.fail_msg or not nr.payload:
1918
        continue
1919
      for name, path, status, diagnose in nr.payload:
1920
        if name not in all_os:
1921
          # build a list of nodes for this os containing empty lists
1922
          # for each node in node_list
1923
          all_os[name] = {}
1924
          for nname in good_nodes:
1925
            all_os[name][nname] = []
1926
        all_os[name][node_name].append((path, status, diagnose))
1927
    return all_os
1928

    
1929
  def Exec(self, feedback_fn):
1930
    """Compute the list of OSes.
1931

1932
    """
1933
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
1934
    node_data = self.rpc.call_os_diagnose(valid_nodes)
1935
    pol = self._DiagnoseByOS(valid_nodes, node_data)
1936
    output = []
1937
    for os_name, os_data in pol.items():
1938
      row = []
1939
      for field in self.op.output_fields:
1940
        if field == "name":
1941
          val = os_name
1942
        elif field == "valid":
1943
          val = utils.all([osl and osl[0][1] for osl in os_data.values()])
1944
        elif field == "node_status":
1945
          # this is just a copy of the dict
1946
          val = {}
1947
          for node_name, nos_list in os_data.items():
1948
            val[node_name] = nos_list
1949
        else:
1950
          raise errors.ParameterError(field)
1951
        row.append(val)
1952
      output.append(row)
1953

    
1954
    return output
1955

    
1956

    
1957
class LURemoveNode(LogicalUnit):
1958
  """Logical unit for removing a node.
1959

1960
  """
1961
  HPATH = "node-remove"
1962
  HTYPE = constants.HTYPE_NODE
1963
  _OP_REQP = ["node_name"]
1964

    
1965
  def BuildHooksEnv(self):
1966
    """Build hooks env.
1967

1968
    This doesn't run on the target node in the pre phase as a failed
1969
    node would then be impossible to remove.
1970

1971
    """
1972
    env = {
1973
      "OP_TARGET": self.op.node_name,
1974
      "NODE_NAME": self.op.node_name,
1975
      }
1976
    all_nodes = self.cfg.GetNodeList()
1977
    all_nodes.remove(self.op.node_name)
1978
    return env, all_nodes, all_nodes
1979

    
1980
  def CheckPrereq(self):
1981
    """Check prerequisites.
1982

1983
    This checks:
1984
     - the node exists in the configuration
1985
     - it does not have primary or secondary instances
1986
     - it's not the master
1987

1988
    Any errors are signaled by raising errors.OpPrereqError.
1989

1990
    """
1991
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
1992
    if node is None:
1993
      raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
1994

    
1995
    instance_list = self.cfg.GetInstanceList()
1996

    
1997
    masternode = self.cfg.GetMasterNode()
1998
    if node.name == masternode:
1999
      raise errors.OpPrereqError("Node is the master node,"
2000
                                 " you need to failover first.")
2001

    
2002
    for instance_name in instance_list:
2003
      instance = self.cfg.GetInstanceInfo(instance_name)
2004
      if node.name in instance.all_nodes:
2005
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2006
                                   " please remove first." % instance_name)
2007
    self.op.node_name = node.name
2008
    self.node = node
2009

    
2010
  def Exec(self, feedback_fn):
2011
    """Removes the node from the cluster.
2012

2013
    """
2014
    node = self.node
2015
    logging.info("Stopping the node daemon and removing configs from node %s",
2016
                 node.name)
2017

    
2018
    self.context.RemoveNode(node.name)
2019

    
2020
    result = self.rpc.call_node_leave_cluster(node.name)
2021
    msg = result.fail_msg
2022
    if msg:
2023
      self.LogWarning("Errors encountered on the remote node while leaving"
2024
                      " the cluster: %s", msg)
2025

    
2026
    # Promote nodes to master candidate as needed
2027
    _AdjustCandidatePool(self)
2028

    
2029

    
2030
class LUQueryNodes(NoHooksLU):
2031
  """Logical unit for querying nodes.
2032

2033
  """
2034
  _OP_REQP = ["output_fields", "names", "use_locking"]
2035
  REQ_BGL = False
2036
  _FIELDS_DYNAMIC = utils.FieldSet(
2037
    "dtotal", "dfree",
2038
    "mtotal", "mnode", "mfree",
2039
    "bootid",
2040
    "ctotal", "cnodes", "csockets",
2041
    )
2042

    
2043
  _FIELDS_STATIC = utils.FieldSet(
2044
    "name", "pinst_cnt", "sinst_cnt",
2045
    "pinst_list", "sinst_list",
2046
    "pip", "sip", "tags",
2047
    "serial_no",
2048
    "master_candidate",
2049
    "master",
2050
    "offline",
2051
    "drained",
2052
    "role",
2053
    )
2054

    
2055
  def ExpandNames(self):
2056
    _CheckOutputFields(static=self._FIELDS_STATIC,
2057
                       dynamic=self._FIELDS_DYNAMIC,
2058
                       selected=self.op.output_fields)
2059

    
2060
    self.needed_locks = {}
2061
    self.share_locks[locking.LEVEL_NODE] = 1
2062

    
2063
    if self.op.names:
2064
      self.wanted = _GetWantedNodes(self, self.op.names)
2065
    else:
2066
      self.wanted = locking.ALL_SET
2067

    
2068
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2069
    self.do_locking = self.do_node_query and self.op.use_locking
2070
    if self.do_locking:
2071
      # if we don't request only static fields, we need to lock the nodes
2072
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2073

    
2074

    
2075
  def CheckPrereq(self):
2076
    """Check prerequisites.
2077

2078
    """
2079
    # The validation of the node list is done in the _GetWantedNodes,
2080
    # if non empty, and if empty, there's no validation to do
2081
    pass
2082

    
2083
  def Exec(self, feedback_fn):
2084
    """Computes the list of nodes and their attributes.
2085

2086
    """
2087
    all_info = self.cfg.GetAllNodesInfo()
2088
    if self.do_locking:
2089
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2090
    elif self.wanted != locking.ALL_SET:
2091
      nodenames = self.wanted
2092
      missing = set(nodenames).difference(all_info.keys())
2093
      if missing:
2094
        raise errors.OpExecError(
2095
          "Some nodes were removed before retrieving their data: %s" % missing)
2096
    else:
2097
      nodenames = all_info.keys()
2098

    
2099
    nodenames = utils.NiceSort(nodenames)
2100
    nodelist = [all_info[name] for name in nodenames]
2101

    
2102
    # begin data gathering
2103

    
2104
    if self.do_node_query:
2105
      live_data = {}
2106
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2107
                                          self.cfg.GetHypervisorType())
2108
      for name in nodenames:
2109
        nodeinfo = node_data[name]
2110
        if not nodeinfo.fail_msg and nodeinfo.payload:
2111
          nodeinfo = nodeinfo.payload
2112
          fn = utils.TryConvert
2113
          live_data[name] = {
2114
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2115
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2116
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2117
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2118
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2119
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2120
            "bootid": nodeinfo.get('bootid', None),
2121
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2122
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2123
            }
2124
        else:
2125
          live_data[name] = {}
2126
    else:
2127
      live_data = dict.fromkeys(nodenames, {})
2128

    
2129
    node_to_primary = dict([(name, set()) for name in nodenames])
2130
    node_to_secondary = dict([(name, set()) for name in nodenames])
2131

    
2132
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2133
                             "sinst_cnt", "sinst_list"))
2134
    if inst_fields & frozenset(self.op.output_fields):
2135
      instancelist = self.cfg.GetInstanceList()
2136

    
2137
      for instance_name in instancelist:
2138
        inst = self.cfg.GetInstanceInfo(instance_name)
2139
        if inst.primary_node in node_to_primary:
2140
          node_to_primary[inst.primary_node].add(inst.name)
2141
        for secnode in inst.secondary_nodes:
2142
          if secnode in node_to_secondary:
2143
            node_to_secondary[secnode].add(inst.name)
2144

    
2145
    master_node = self.cfg.GetMasterNode()
2146

    
2147
    # end data gathering
2148

    
2149
    output = []
2150
    for node in nodelist:
2151
      node_output = []
2152
      for field in self.op.output_fields:
2153
        if field == "name":
2154
          val = node.name
2155
        elif field == "pinst_list":
2156
          val = list(node_to_primary[node.name])
2157
        elif field == "sinst_list":
2158
          val = list(node_to_secondary[node.name])
2159
        elif field == "pinst_cnt":
2160
          val = len(node_to_primary[node.name])
2161
        elif field == "sinst_cnt":
2162
          val = len(node_to_secondary[node.name])
2163
        elif field == "pip":
2164
          val = node.primary_ip
2165
        elif field == "sip":
2166
          val = node.secondary_ip
2167
        elif field == "tags":
2168
          val = list(node.GetTags())
2169
        elif field == "serial_no":
2170
          val = node.serial_no
2171
        elif field == "master_candidate":
2172
          val = node.master_candidate
2173
        elif field == "master":
2174
          val = node.name == master_node
2175
        elif field == "offline":
2176
          val = node.offline
2177
        elif field == "drained":
2178
          val = node.drained
2179
        elif self._FIELDS_DYNAMIC.Matches(field):
2180
          val = live_data[node.name].get(field, None)
2181
        elif field == "role":
2182
          if node.name == master_node:
2183
            val = "M"
2184
          elif node.master_candidate:
2185
            val = "C"
2186
          elif node.drained:
2187
            val = "D"
2188
          elif node.offline:
2189
            val = "O"
2190
          else:
2191
            val = "R"
2192
        else:
2193
          raise errors.ParameterError(field)
2194
        node_output.append(val)
2195
      output.append(node_output)
2196

    
2197
    return output
2198

    
2199

    
2200
class LUQueryNodeVolumes(NoHooksLU):
2201
  """Logical unit for getting volumes on node(s).
2202

2203
  """
2204
  _OP_REQP = ["nodes", "output_fields"]
2205
  REQ_BGL = False
2206
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2207
  _FIELDS_STATIC = utils.FieldSet("node")
2208

    
2209
  def ExpandNames(self):
2210
    _CheckOutputFields(static=self._FIELDS_STATIC,
2211
                       dynamic=self._FIELDS_DYNAMIC,
2212
                       selected=self.op.output_fields)
2213

    
2214
    self.needed_locks = {}
2215
    self.share_locks[locking.LEVEL_NODE] = 1
2216
    if not self.op.nodes:
2217
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2218
    else:
2219
      self.needed_locks[locking.LEVEL_NODE] = \
2220
        _GetWantedNodes(self, self.op.nodes)
2221

    
2222
  def CheckPrereq(self):
2223
    """Check prerequisites.
2224

2225
    This checks that the fields required are valid output fields.
2226

2227
    """
2228
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2229

    
2230
  def Exec(self, feedback_fn):
2231
    """Computes the list of nodes and their attributes.
2232

2233
    """
2234
    nodenames = self.nodes
2235
    volumes = self.rpc.call_node_volumes(nodenames)
2236

    
2237
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2238
             in self.cfg.GetInstanceList()]
2239

    
2240
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2241

    
2242
    output = []
2243
    for node in nodenames:
2244
      nresult = volumes[node]
2245
      if nresult.offline:
2246
        continue
2247
      msg = nresult.fail_msg
2248
      if msg:
2249
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2250
        continue
2251

    
2252
      node_vols = nresult.payload[:]
2253
      node_vols.sort(key=lambda vol: vol['dev'])
2254

    
2255
      for vol in node_vols:
2256
        node_output = []
2257
        for field in self.op.output_fields:
2258
          if field == "node":
2259
            val = node
2260
          elif field == "phys":
2261
            val = vol['dev']
2262
          elif field == "vg":
2263
            val = vol['vg']
2264
          elif field == "name":
2265
            val = vol['name']
2266
          elif field == "size":
2267
            val = int(float(vol['size']))
2268
          elif field == "instance":
2269
            for inst in ilist:
2270
              if node not in lv_by_node[inst]:
2271
                continue
2272
              if vol['name'] in lv_by_node[inst][node]:
2273
                val = inst.name
2274
                break
2275
            else:
2276
              val = '-'
2277
          else:
2278
            raise errors.ParameterError(field)
2279
          node_output.append(str(val))
2280

    
2281
        output.append(node_output)
2282

    
2283
    return output
2284

    
2285

    
2286
class LUAddNode(LogicalUnit):
2287
  """Logical unit for adding node to the cluster.
2288

2289
  """
2290
  HPATH = "node-add"
2291
  HTYPE = constants.HTYPE_NODE
2292
  _OP_REQP = ["node_name"]
2293

    
2294
  def BuildHooksEnv(self):
2295
    """Build hooks env.
2296

2297
    This will run on all nodes before, and on all nodes + the new node after.
2298

2299
    """
2300
    env = {
2301
      "OP_TARGET": self.op.node_name,
2302
      "NODE_NAME": self.op.node_name,
2303
      "NODE_PIP": self.op.primary_ip,
2304
      "NODE_SIP": self.op.secondary_ip,
2305
      }
2306
    nodes_0 = self.cfg.GetNodeList()
2307
    nodes_1 = nodes_0 + [self.op.node_name, ]
2308
    return env, nodes_0, nodes_1
2309

    
2310
  def CheckPrereq(self):
2311
    """Check prerequisites.
2312

2313
    This checks:
2314
     - the new node is not already in the config
2315
     - it is resolvable
2316
     - its parameters (single/dual homed) matches the cluster
2317

2318
    Any errors are signaled by raising errors.OpPrereqError.
2319

2320
    """
2321
    node_name = self.op.node_name
2322
    cfg = self.cfg
2323

    
2324
    dns_data = utils.HostInfo(node_name)
2325

    
2326
    node = dns_data.name
2327
    primary_ip = self.op.primary_ip = dns_data.ip
2328
    secondary_ip = getattr(self.op, "secondary_ip", None)
2329
    if secondary_ip is None:
2330
      secondary_ip = primary_ip
2331
    if not utils.IsValidIP(secondary_ip):
2332
      raise errors.OpPrereqError("Invalid secondary IP given")
2333
    self.op.secondary_ip = secondary_ip
2334

    
2335
    node_list = cfg.GetNodeList()
2336
    if not self.op.readd and node in node_list:
2337
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2338
                                 node)
2339
    elif self.op.readd and node not in node_list:
2340
      raise errors.OpPrereqError("Node %s is not in the configuration" % node)
2341

    
2342
    for existing_node_name in node_list:
2343
      existing_node = cfg.GetNodeInfo(existing_node_name)
2344

    
2345
      if self.op.readd and node == existing_node_name:
2346
        if (existing_node.primary_ip != primary_ip or
2347
            existing_node.secondary_ip != secondary_ip):
2348
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2349
                                     " address configuration as before")
2350
        continue
2351

    
2352
      if (existing_node.primary_ip == primary_ip or
2353
          existing_node.secondary_ip == primary_ip or
2354
          existing_node.primary_ip == secondary_ip or
2355
          existing_node.secondary_ip == secondary_ip):
2356
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2357
                                   " existing node %s" % existing_node.name)
2358

    
2359
    # check that the type of the node (single versus dual homed) is the
2360
    # same as for the master
2361
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2362
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2363
    newbie_singlehomed = secondary_ip == primary_ip
2364
    if master_singlehomed != newbie_singlehomed:
2365
      if master_singlehomed:
2366
        raise errors.OpPrereqError("The master has no private ip but the"
2367
                                   " new node has one")
2368
      else:
2369
        raise errors.OpPrereqError("The master has a private ip but the"
2370
                                   " new node doesn't have one")
2371

    
2372
    # checks reachability
2373
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2374
      raise errors.OpPrereqError("Node not reachable by ping")
2375

    
2376
    if not newbie_singlehomed:
2377
      # check reachability from my secondary ip to newbie's secondary ip
2378
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2379
                           source=myself.secondary_ip):
2380
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2381
                                   " based ping to noded port")
2382

    
2383
    cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2384
    if self.op.readd:
2385
      exceptions = [node]
2386
    else:
2387
      exceptions = []
2388
    mc_now, mc_max = self.cfg.GetMasterCandidateStats(exceptions)
2389
    # the new node will increase mc_max with one, so:
2390
    mc_max = min(mc_max + 1, cp_size)
2391
    self.master_candidate = mc_now < mc_max
2392

    
2393
    if self.op.readd:
2394
      self.new_node = self.cfg.GetNodeInfo(node)
2395
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
2396
    else:
2397
      self.new_node = objects.Node(name=node,
2398
                                   primary_ip=primary_ip,
2399
                                   secondary_ip=secondary_ip,
2400
                                   master_candidate=self.master_candidate,
2401
                                   offline=False, drained=False)
2402

    
2403
  def Exec(self, feedback_fn):
2404
    """Adds the new node to the cluster.
2405

2406
    """
2407
    new_node = self.new_node
2408
    node = new_node.name
2409

    
2410
    # for re-adds, reset the offline/drained/master-candidate flags;
2411
    # we need to reset here, otherwise offline would prevent RPC calls
2412
    # later in the procedure; this also means that if the re-add
2413
    # fails, we are left with a non-offlined, broken node
2414
    if self.op.readd:
2415
      new_node.drained = new_node.offline = False
2416
      self.LogInfo("Readding a node, the offline/drained flags were reset")
2417
      # if we demote the node, we do cleanup later in the procedure
2418
      new_node.master_candidate = self.master_candidate
2419

    
2420
    # notify the user about any possible mc promotion
2421
    if new_node.master_candidate:
2422
      self.LogInfo("Node will be a master candidate")
2423

    
2424
    # check connectivity
2425
    result = self.rpc.call_version([node])[node]
2426
    result.Raise("Can't get version information from node %s" % node)
2427
    if constants.PROTOCOL_VERSION == result.payload:
2428
      logging.info("Communication to node %s fine, sw version %s match",
2429
                   node, result.payload)
2430
    else:
2431
      raise errors.OpExecError("Version mismatch master version %s,"
2432
                               " node version %s" %
2433
                               (constants.PROTOCOL_VERSION, result.payload))
2434

    
2435
    # setup ssh on node
2436
    logging.info("Copy ssh key to node %s", node)
2437
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
2438
    keyarray = []
2439
    keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
2440
                constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
2441
                priv_key, pub_key]
2442

    
2443
    for i in keyfiles:
2444
      f = open(i, 'r')
2445
      try:
2446
        keyarray.append(f.read())
2447
      finally:
2448
        f.close()
2449

    
2450
    result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
2451
                                    keyarray[2],
2452
                                    keyarray[3], keyarray[4], keyarray[5])
2453
    result.Raise("Cannot transfer ssh keys to the new node")
2454

    
2455
    # Add node to our /etc/hosts, and add key to known_hosts
2456
    if self.cfg.GetClusterInfo().modify_etc_hosts:
2457
      utils.AddHostToEtcHosts(new_node.name)
2458

    
2459
    if new_node.secondary_ip != new_node.primary_ip:
2460
      result = self.rpc.call_node_has_ip_address(new_node.name,
2461
                                                 new_node.secondary_ip)
2462
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
2463
                   prereq=True)
2464
      if not result.payload:
2465
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
2466
                                 " you gave (%s). Please fix and re-run this"
2467
                                 " command." % new_node.secondary_ip)
2468

    
2469
    node_verify_list = [self.cfg.GetMasterNode()]
2470
    node_verify_param = {
2471
      'nodelist': [node],
2472
      # TODO: do a node-net-test as well?
2473
    }
2474

    
2475
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2476
                                       self.cfg.GetClusterName())
2477
    for verifier in node_verify_list:
2478
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
2479
      nl_payload = result[verifier].payload['nodelist']
2480
      if nl_payload:
2481
        for failed in nl_payload:
2482
          feedback_fn("ssh/hostname verification failed %s -> %s" %
2483
                      (verifier, nl_payload[failed]))
2484
        raise errors.OpExecError("ssh/hostname verification failed.")
2485

    
2486
    if self.op.readd:
2487
      _RedistributeAncillaryFiles(self)
2488
      self.context.ReaddNode(new_node)
2489
      # make sure we redistribute the config
2490
      self.cfg.Update(new_node)
2491
      # and make sure the new node will not have old files around
2492
      if not new_node.master_candidate:
2493
        result = self.rpc.call_node_demote_from_mc(new_node.name)
2494
        msg = result.RemoteFailMsg()
2495
        if msg:
2496
          self.LogWarning("Node failed to demote itself from master"
2497
                          " candidate status: %s" % msg)
2498
    else:
2499
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
2500
      self.context.AddNode(new_node)
2501

    
2502

    
2503
class LUSetNodeParams(LogicalUnit):
2504
  """Modifies the parameters of a node.
2505

2506
  """
2507
  HPATH = "node-modify"
2508
  HTYPE = constants.HTYPE_NODE
2509
  _OP_REQP = ["node_name"]
2510
  REQ_BGL = False
2511

    
2512
  def CheckArguments(self):
2513
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2514
    if node_name is None:
2515
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2516
    self.op.node_name = node_name
2517
    _CheckBooleanOpField(self.op, 'master_candidate')
2518
    _CheckBooleanOpField(self.op, 'offline')
2519
    _CheckBooleanOpField(self.op, 'drained')
2520
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
2521
    if all_mods.count(None) == 3:
2522
      raise errors.OpPrereqError("Please pass at least one modification")
2523
    if all_mods.count(True) > 1:
2524
      raise errors.OpPrereqError("Can't set the node into more than one"
2525
                                 " state at the same time")
2526

    
2527
  def ExpandNames(self):
2528
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
2529

    
2530
  def BuildHooksEnv(self):
2531
    """Build hooks env.
2532

2533
    This runs on the master node.
2534

2535
    """
2536
    env = {
2537
      "OP_TARGET": self.op.node_name,
2538
      "MASTER_CANDIDATE": str(self.op.master_candidate),
2539
      "OFFLINE": str(self.op.offline),
2540
      "DRAINED": str(self.op.drained),
2541
      }
2542
    nl = [self.cfg.GetMasterNode(),
2543
          self.op.node_name]
2544
    return env, nl, nl
2545

    
2546
  def CheckPrereq(self):
2547
    """Check prerequisites.
2548

2549
    This only checks the instance list against the existing names.
2550

2551
    """
2552
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
2553

    
2554
    if ((self.op.master_candidate == False or self.op.offline == True or
2555
         self.op.drained == True) and node.master_candidate):
2556
      # we will demote the node from master_candidate
2557
      if self.op.node_name == self.cfg.GetMasterNode():
2558
        raise errors.OpPrereqError("The master node has to be a"
2559
                                   " master candidate, online and not drained")
2560
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2561
      num_candidates, _ = self.cfg.GetMasterCandidateStats()
2562
      if num_candidates <= cp_size:
2563
        msg = ("Not enough master candidates (desired"
2564
               " %d, new value will be %d)" % (cp_size, num_candidates-1))
2565
        if self.op.force:
2566
          self.LogWarning(msg)
2567
        else:
2568
          raise errors.OpPrereqError(msg)
2569

    
2570
    if (self.op.master_candidate == True and
2571
        ((node.offline and not self.op.offline == False) or
2572
         (node.drained and not self.op.drained == False))):
2573
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
2574
                                 " to master_candidate" % node.name)
2575

    
2576
    return
2577

    
2578
  def Exec(self, feedback_fn):
2579
    """Modifies a node.
2580

2581
    """
2582
    node = self.node
2583

    
2584
    result = []
2585
    changed_mc = False
2586

    
2587
    if self.op.offline is not None:
2588
      node.offline = self.op.offline
2589
      result.append(("offline", str(self.op.offline)))
2590
      if self.op.offline == True:
2591
        if node.master_candidate:
2592
          node.master_candidate = False
2593
          changed_mc = True
2594
          result.append(("master_candidate", "auto-demotion due to offline"))
2595
        if node.drained:
2596
          node.drained = False
2597
          result.append(("drained", "clear drained status due to offline"))
2598

    
2599
    if self.op.master_candidate is not None:
2600
      node.master_candidate = self.op.master_candidate
2601
      changed_mc = True
2602
      result.append(("master_candidate", str(self.op.master_candidate)))
2603
      if self.op.master_candidate == False:
2604
        rrc = self.rpc.call_node_demote_from_mc(node.name)
2605
        msg = rrc.fail_msg
2606
        if msg:
2607
          self.LogWarning("Node failed to demote itself: %s" % msg)
2608

    
2609
    if self.op.drained is not None:
2610
      node.drained = self.op.drained
2611
      result.append(("drained", str(self.op.drained)))
2612
      if self.op.drained == True:
2613
        if node.master_candidate:
2614
          node.master_candidate = False
2615
          changed_mc = True
2616
          result.append(("master_candidate", "auto-demotion due to drain"))
2617
          rrc = self.rpc.call_node_demote_from_mc(node.name)
2618
          msg = rrc.RemoteFailMsg()
2619
          if msg:
2620
            self.LogWarning("Node failed to demote itself: %s" % msg)
2621
        if node.offline:
2622
          node.offline = False
2623
          result.append(("offline", "clear offline status due to drain"))
2624

    
2625
    # this will trigger configuration file update, if needed
2626
    self.cfg.Update(node)
2627
    # this will trigger job queue propagation or cleanup
2628
    if changed_mc:
2629
      self.context.ReaddNode(node)
2630

    
2631
    return result
2632

    
2633

    
2634
class LUPowercycleNode(NoHooksLU):
2635
  """Powercycles a node.
2636

2637
  """
2638
  _OP_REQP = ["node_name", "force"]
2639
  REQ_BGL = False
2640

    
2641
  def CheckArguments(self):
2642
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2643
    if node_name is None:
2644
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2645
    self.op.node_name = node_name
2646
    if node_name == self.cfg.GetMasterNode() and not self.op.force:
2647
      raise errors.OpPrereqError("The node is the master and the force"
2648
                                 " parameter was not set")
2649

    
2650
  def ExpandNames(self):
2651
    """Locking for PowercycleNode.
2652

2653
    This is a last-resource option and shouldn't block on other
2654
    jobs. Therefore, we grab no locks.
2655

2656
    """
2657
    self.needed_locks = {}
2658

    
2659
  def CheckPrereq(self):
2660
    """Check prerequisites.
2661

2662
    This LU has no prereqs.
2663

2664
    """
2665
    pass
2666

    
2667
  def Exec(self, feedback_fn):
2668
    """Reboots a node.
2669

2670
    """
2671
    result = self.rpc.call_node_powercycle(self.op.node_name,
2672
                                           self.cfg.GetHypervisorType())
2673
    result.Raise("Failed to schedule the reboot")
2674
    return result.payload
2675

    
2676

    
2677
class LUQueryClusterInfo(NoHooksLU):
2678
  """Query cluster configuration.
2679

2680
  """
2681
  _OP_REQP = []
2682
  REQ_BGL = False
2683

    
2684
  def ExpandNames(self):
2685
    self.needed_locks = {}
2686

    
2687
  def CheckPrereq(self):
2688
    """No prerequsites needed for this LU.
2689

2690
    """
2691
    pass
2692

    
2693
  def Exec(self, feedback_fn):
2694
    """Return cluster config.
2695

2696
    """
2697
    cluster = self.cfg.GetClusterInfo()
2698
    result = {
2699
      "software_version": constants.RELEASE_VERSION,
2700
      "protocol_version": constants.PROTOCOL_VERSION,
2701
      "config_version": constants.CONFIG_VERSION,
2702
      "os_api_version": max(constants.OS_API_VERSIONS),
2703
      "export_version": constants.EXPORT_VERSION,
2704
      "architecture": (platform.architecture()[0], platform.machine()),
2705
      "name": cluster.cluster_name,
2706
      "master": cluster.master_node,
2707
      "default_hypervisor": cluster.enabled_hypervisors[0],
2708
      "enabled_hypervisors": cluster.enabled_hypervisors,
2709
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
2710
                        for hypervisor_name in cluster.enabled_hypervisors]),
2711
      "beparams": cluster.beparams,
2712
      "nicparams": cluster.nicparams,
2713
      "candidate_pool_size": cluster.candidate_pool_size,
2714
      "master_netdev": cluster.master_netdev,
2715
      "volume_group_name": cluster.volume_group_name,
2716
      "file_storage_dir": cluster.file_storage_dir,
2717
      }
2718

    
2719
    return result
2720

    
2721

    
2722
class LUQueryConfigValues(NoHooksLU):
2723
  """Return configuration values.
2724

2725
  """
2726
  _OP_REQP = []
2727
  REQ_BGL = False
2728
  _FIELDS_DYNAMIC = utils.FieldSet()
2729
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag")
2730

    
2731
  def ExpandNames(self):
2732
    self.needed_locks = {}
2733

    
2734
    _CheckOutputFields(static=self._FIELDS_STATIC,
2735
                       dynamic=self._FIELDS_DYNAMIC,
2736
                       selected=self.op.output_fields)
2737

    
2738
  def CheckPrereq(self):
2739
    """No prerequisites.
2740

2741
    """
2742
    pass
2743

    
2744
  def Exec(self, feedback_fn):
2745
    """Dump a representation of the cluster config to the standard output.
2746

2747
    """
2748
    values = []
2749
    for field in self.op.output_fields:
2750
      if field == "cluster_name":
2751
        entry = self.cfg.GetClusterName()
2752
      elif field == "master_node":
2753
        entry = self.cfg.GetMasterNode()
2754
      elif field == "drain_flag":
2755
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
2756
      else:
2757
        raise errors.ParameterError(field)
2758
      values.append(entry)
2759
    return values
2760

    
2761

    
2762
class LUActivateInstanceDisks(NoHooksLU):
2763
  """Bring up an instance's disks.
2764

2765
  """
2766
  _OP_REQP = ["instance_name"]
2767
  REQ_BGL = False
2768

    
2769
  def ExpandNames(self):
2770
    self._ExpandAndLockInstance()
2771
    self.needed_locks[locking.LEVEL_NODE] = []
2772
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2773

    
2774
  def DeclareLocks(self, level):
2775
    if level == locking.LEVEL_NODE:
2776
      self._LockInstancesNodes()
2777

    
2778
  def CheckPrereq(self):
2779
    """Check prerequisites.
2780

2781
    This checks that the instance is in the cluster.
2782

2783
    """
2784
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2785
    assert self.instance is not None, \
2786
      "Cannot retrieve locked instance %s" % self.op.instance_name
2787
    _CheckNodeOnline(self, self.instance.primary_node)
2788

    
2789
  def Exec(self, feedback_fn):
2790
    """Activate the disks.
2791

2792
    """
2793
    disks_ok, disks_info = _AssembleInstanceDisks(self, self.instance)
2794
    if not disks_ok:
2795
      raise errors.OpExecError("Cannot activate block devices")
2796

    
2797
    return disks_info
2798

    
2799

    
2800
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
2801
  """Prepare the block devices for an instance.
2802

2803
  This sets up the block devices on all nodes.
2804

2805
  @type lu: L{LogicalUnit}
2806
  @param lu: the logical unit on whose behalf we execute
2807
  @type instance: L{objects.Instance}
2808
  @param instance: the instance for whose disks we assemble
2809
  @type ignore_secondaries: boolean
2810
  @param ignore_secondaries: if true, errors on secondary nodes
2811
      won't result in an error return from the function
2812
  @return: False if the operation failed, otherwise a list of
2813
      (host, instance_visible_name, node_visible_name)
2814
      with the mapping from node devices to instance devices
2815

2816
  """
2817
  device_info = []
2818
  disks_ok = True
2819
  iname = instance.name
2820
  # With the two passes mechanism we try to reduce the window of
2821
  # opportunity for the race condition of switching DRBD to primary
2822
  # before handshaking occured, but we do not eliminate it
2823

    
2824
  # The proper fix would be to wait (with some limits) until the
2825
  # connection has been made and drbd transitions from WFConnection
2826
  # into any other network-connected state (Connected, SyncTarget,
2827
  # SyncSource, etc.)
2828

    
2829
  # 1st pass, assemble on all nodes in secondary mode
2830
  for inst_disk in instance.disks:
2831
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2832
      lu.cfg.SetDiskID(node_disk, node)
2833
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
2834
      msg = result.fail_msg
2835
      if msg:
2836
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
2837
                           " (is_primary=False, pass=1): %s",
2838
                           inst_disk.iv_name, node, msg)
2839
        if not ignore_secondaries:
2840
          disks_ok = False
2841

    
2842
  # FIXME: race condition on drbd migration to primary
2843

    
2844
  # 2nd pass, do only the primary node
2845
  for inst_disk in instance.disks:
2846
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2847
      if node != instance.primary_node:
2848
        continue
2849
      lu.cfg.SetDiskID(node_disk, node)
2850
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
2851
      msg = result.fail_msg
2852
      if msg:
2853
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
2854
                           " (is_primary=True, pass=2): %s",
2855
                           inst_disk.iv_name, node, msg)
2856
        disks_ok = False
2857
    device_info.append((instance.primary_node, inst_disk.iv_name,
2858
                        result.payload))
2859

    
2860
  # leave the disks configured for the primary node
2861
  # this is a workaround that would be fixed better by
2862
  # improving the logical/physical id handling
2863
  for disk in instance.disks:
2864
    lu.cfg.SetDiskID(disk, instance.primary_node)
2865

    
2866
  return disks_ok, device_info
2867

    
2868

    
2869
def _StartInstanceDisks(lu, instance, force):
2870
  """Start the disks of an instance.
2871

2872
  """
2873
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
2874
                                           ignore_secondaries=force)
2875
  if not disks_ok:
2876
    _ShutdownInstanceDisks(lu, instance)
2877
    if force is not None and not force:
2878
      lu.proc.LogWarning("", hint="If the message above refers to a"
2879
                         " secondary node,"
2880
                         " you can retry the operation using '--force'.")
2881
    raise errors.OpExecError("Disk consistency error")
2882

    
2883

    
2884
class LUDeactivateInstanceDisks(NoHooksLU):
2885
  """Shutdown an instance's disks.
2886

2887
  """
2888
  _OP_REQP = ["instance_name"]
2889
  REQ_BGL = False
2890

    
2891
  def ExpandNames(self):
2892
    self._ExpandAndLockInstance()
2893
    self.needed_locks[locking.LEVEL_NODE] = []
2894
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2895

    
2896
  def DeclareLocks(self, level):
2897
    if level == locking.LEVEL_NODE:
2898
      self._LockInstancesNodes()
2899

    
2900
  def CheckPrereq(self):
2901
    """Check prerequisites.
2902

2903
    This checks that the instance is in the cluster.
2904

2905
    """
2906
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2907
    assert self.instance is not None, \
2908
      "Cannot retrieve locked instance %s" % self.op.instance_name
2909

    
2910
  def Exec(self, feedback_fn):
2911
    """Deactivate the disks
2912

2913
    """
2914
    instance = self.instance
2915
    _SafeShutdownInstanceDisks(self, instance)
2916

    
2917

    
2918
def _SafeShutdownInstanceDisks(lu, instance):
2919
  """Shutdown block devices of an instance.
2920

2921
  This function checks if an instance is running, before calling
2922
  _ShutdownInstanceDisks.
2923

2924
  """
2925
  pnode = instance.primary_node
2926
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
2927
  ins_l.Raise("Can't contact node %s" % pnode)
2928

    
2929
  if instance.name in ins_l.payload:
2930
    raise errors.OpExecError("Instance is running, can't shutdown"
2931
                             " block devices.")
2932

    
2933
  _ShutdownInstanceDisks(lu, instance)
2934

    
2935

    
2936
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
2937
  """Shutdown block devices of an instance.
2938

2939
  This does the shutdown on all nodes of the instance.
2940

2941
  If the ignore_primary is false, errors on the primary node are
2942
  ignored.
2943

2944
  """
2945
  all_result = True
2946
  for disk in instance.disks:
2947
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
2948
      lu.cfg.SetDiskID(top_disk, node)
2949
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
2950
      msg = result.fail_msg
2951
      if msg:
2952
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
2953
                      disk.iv_name, node, msg)
2954
        if not ignore_primary or node != instance.primary_node:
2955
          all_result = False
2956
  return all_result
2957

    
2958

    
2959
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
2960
  """Checks if a node has enough free memory.
2961

2962
  This function check if a given node has the needed amount of free
2963
  memory. In case the node has less memory or we cannot get the
2964
  information from the node, this function raise an OpPrereqError
2965
  exception.
2966

2967
  @type lu: C{LogicalUnit}
2968
  @param lu: a logical unit from which we get configuration data
2969
  @type node: C{str}
2970
  @param node: the node to check
2971
  @type reason: C{str}
2972
  @param reason: string to use in the error message
2973
  @type requested: C{int}
2974
  @param requested: the amount of memory in MiB to check for
2975
  @type hypervisor_name: C{str}
2976
  @param hypervisor_name: the hypervisor to ask for memory stats
2977
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
2978
      we cannot check the node
2979

2980
  """
2981
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
2982
  nodeinfo[node].Raise("Can't get data from node %s" % node, prereq=True)
2983
  free_mem = nodeinfo[node].payload.get('memory_free', None)
2984
  if not isinstance(free_mem, int):
2985
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
2986
                               " was '%s'" % (node, free_mem))
2987
  if requested > free_mem:
2988
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
2989
                               " needed %s MiB, available %s MiB" %
2990
                               (node, reason, requested, free_mem))
2991

    
2992

    
2993
class LUStartupInstance(LogicalUnit):
2994
  """Starts an instance.
2995

2996
  """
2997
  HPATH = "instance-start"
2998
  HTYPE = constants.HTYPE_INSTANCE
2999
  _OP_REQP = ["instance_name", "force"]
3000
  REQ_BGL = False
3001

    
3002
  def ExpandNames(self):
3003
    self._ExpandAndLockInstance()
3004

    
3005
  def BuildHooksEnv(self):
3006
    """Build hooks env.
3007

3008
    This runs on master, primary and secondary nodes of the instance.
3009

3010
    """
3011
    env = {
3012
      "FORCE": self.op.force,
3013
      }
3014
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3015
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3016
    return env, nl, nl
3017

    
3018
  def CheckPrereq(self):
3019
    """Check prerequisites.
3020

3021
    This checks that the instance is in the cluster.
3022

3023
    """
3024
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3025
    assert self.instance is not None, \
3026
      "Cannot retrieve locked instance %s" % self.op.instance_name
3027

    
3028
    # extra beparams
3029
    self.beparams = getattr(self.op, "beparams", {})
3030
    if self.beparams:
3031
      if not isinstance(self.beparams, dict):
3032
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3033
                                   " dict" % (type(self.beparams), ))
3034
      # fill the beparams dict
3035
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3036
      self.op.beparams = self.beparams
3037

    
3038
    # extra hvparams
3039
    self.hvparams = getattr(self.op, "hvparams", {})
3040
    if self.hvparams:
3041
      if not isinstance(self.hvparams, dict):
3042
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3043
                                   " dict" % (type(self.hvparams), ))
3044

    
3045
      # check hypervisor parameter syntax (locally)
3046
      cluster = self.cfg.GetClusterInfo()
3047
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3048
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3049
                                    instance.hvparams)
3050
      filled_hvp.update(self.hvparams)
3051
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3052
      hv_type.CheckParameterSyntax(filled_hvp)
3053
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3054
      self.op.hvparams = self.hvparams
3055

    
3056
    _CheckNodeOnline(self, instance.primary_node)
3057

    
3058
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3059
    # check bridges existence
3060
    _CheckInstanceBridgesExist(self, instance)
3061

    
3062
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3063
                                              instance.name,
3064
                                              instance.hypervisor)
3065
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3066
                      prereq=True)
3067
    if not remote_info.payload: # not running already
3068
      _CheckNodeFreeMemory(self, instance.primary_node,
3069
                           "starting instance %s" % instance.name,
3070
                           bep[constants.BE_MEMORY], instance.hypervisor)
3071

    
3072
  def Exec(self, feedback_fn):
3073
    """Start the instance.
3074

3075
    """
3076
    instance = self.instance
3077
    force = self.op.force
3078

    
3079
    self.cfg.MarkInstanceUp(instance.name)
3080

    
3081
    node_current = instance.primary_node
3082

    
3083
    _StartInstanceDisks(self, instance, force)
3084

    
3085
    result = self.rpc.call_instance_start(node_current, instance,
3086
                                          self.hvparams, self.beparams)
3087
    msg = result.fail_msg
3088
    if msg:
3089
      _ShutdownInstanceDisks(self, instance)
3090
      raise errors.OpExecError("Could not start instance: %s" % msg)
3091

    
3092

    
3093
class LURebootInstance(LogicalUnit):
3094
  """Reboot an instance.
3095

3096
  """
3097
  HPATH = "instance-reboot"
3098
  HTYPE = constants.HTYPE_INSTANCE
3099
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3100
  REQ_BGL = False
3101

    
3102
  def ExpandNames(self):
3103
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3104
                                   constants.INSTANCE_REBOOT_HARD,
3105
                                   constants.INSTANCE_REBOOT_FULL]:
3106
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3107
                                  (constants.INSTANCE_REBOOT_SOFT,
3108
                                   constants.INSTANCE_REBOOT_HARD,
3109
                                   constants.INSTANCE_REBOOT_FULL))
3110
    self._ExpandAndLockInstance()
3111

    
3112
  def BuildHooksEnv(self):
3113
    """Build hooks env.
3114

3115
    This runs on master, primary and secondary nodes of the instance.
3116

3117
    """
3118
    env = {
3119
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3120
      "REBOOT_TYPE": self.op.reboot_type,
3121
      }
3122
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3123
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3124
    return env, nl, nl
3125

    
3126
  def CheckPrereq(self):
3127
    """Check prerequisites.
3128

3129
    This checks that the instance is in the cluster.
3130

3131
    """
3132
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3133
    assert self.instance is not None, \
3134
      "Cannot retrieve locked instance %s" % self.op.instance_name
3135

    
3136
    _CheckNodeOnline(self, instance.primary_node)
3137

    
3138
    # check bridges existence
3139
    _CheckInstanceBridgesExist(self, instance)
3140

    
3141
  def Exec(self, feedback_fn):
3142
    """Reboot the instance.
3143

3144
    """
3145
    instance = self.instance
3146
    ignore_secondaries = self.op.ignore_secondaries
3147
    reboot_type = self.op.reboot_type
3148

    
3149
    node_current = instance.primary_node
3150

    
3151
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3152
                       constants.INSTANCE_REBOOT_HARD]:
3153
      for disk in instance.disks:
3154
        self.cfg.SetDiskID(disk, node_current)
3155
      result = self.rpc.call_instance_reboot(node_current, instance,
3156
                                             reboot_type)
3157
      result.Raise("Could not reboot instance")
3158
    else:
3159
      result = self.rpc.call_instance_shutdown(node_current, instance)
3160
      result.Raise("Could not shutdown instance for full reboot")
3161
      _ShutdownInstanceDisks(self, instance)
3162
      _StartInstanceDisks(self, instance, ignore_secondaries)
3163
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3164
      msg = result.fail_msg
3165
      if msg:
3166
        _ShutdownInstanceDisks(self, instance)
3167
        raise errors.OpExecError("Could not start instance for"
3168
                                 " full reboot: %s" % msg)
3169

    
3170
    self.cfg.MarkInstanceUp(instance.name)
3171

    
3172

    
3173
class LUShutdownInstance(LogicalUnit):
3174
  """Shutdown an instance.
3175

3176
  """
3177
  HPATH = "instance-stop"
3178
  HTYPE = constants.HTYPE_INSTANCE
3179
  _OP_REQP = ["instance_name"]
3180
  REQ_BGL = False
3181

    
3182
  def ExpandNames(self):
3183
    self._ExpandAndLockInstance()
3184

    
3185
  def BuildHooksEnv(self):
3186
    """Build hooks env.
3187

3188
    This runs on master, primary and secondary nodes of the instance.
3189

3190
    """
3191
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3192
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3193
    return env, nl, nl
3194

    
3195
  def CheckPrereq(self):
3196
    """Check prerequisites.
3197

3198
    This checks that the instance is in the cluster.
3199

3200
    """
3201
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3202
    assert self.instance is not None, \
3203
      "Cannot retrieve locked instance %s" % self.op.instance_name
3204
    _CheckNodeOnline(self, self.instance.primary_node)
3205

    
3206
  def Exec(self, feedback_fn):
3207
    """Shutdown the instance.
3208

3209
    """
3210
    instance = self.instance
3211
    node_current = instance.primary_node
3212
    self.cfg.MarkInstanceDown(instance.name)
3213
    result = self.rpc.call_instance_shutdown(node_current, instance)
3214
    msg = result.fail_msg
3215
    if msg:
3216
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3217

    
3218
    _ShutdownInstanceDisks(self, instance)
3219

    
3220

    
3221
class LUReinstallInstance(LogicalUnit):
3222
  """Reinstall an instance.
3223

3224
  """
3225
  HPATH = "instance-reinstall"
3226
  HTYPE = constants.HTYPE_INSTANCE
3227
  _OP_REQP = ["instance_name"]
3228
  REQ_BGL = False
3229

    
3230
  def ExpandNames(self):
3231
    self._ExpandAndLockInstance()
3232

    
3233
  def BuildHooksEnv(self):
3234
    """Build hooks env.
3235

3236
    This runs on master, primary and secondary nodes of the instance.
3237

3238
    """
3239
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3240
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3241
    return env, nl, nl
3242

    
3243
  def CheckPrereq(self):
3244
    """Check prerequisites.
3245

3246
    This checks that the instance is in the cluster and is not running.
3247

3248
    """
3249
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3250
    assert instance is not None, \
3251
      "Cannot retrieve locked instance %s" % self.op.instance_name
3252
    _CheckNodeOnline(self, instance.primary_node)
3253

    
3254
    if instance.disk_template == constants.DT_DISKLESS:
3255
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3256
                                 self.op.instance_name)
3257
    if instance.admin_up:
3258
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3259
                                 self.op.instance_name)
3260
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3261
                                              instance.name,
3262
                                              instance.hypervisor)
3263
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3264
                      prereq=True)
3265
    if remote_info.payload:
3266
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3267
                                 (self.op.instance_name,
3268
                                  instance.primary_node))
3269

    
3270
    self.op.os_type = getattr(self.op, "os_type", None)
3271
    if self.op.os_type is not None:
3272
      # OS verification
3273
      pnode = self.cfg.GetNodeInfo(
3274
        self.cfg.ExpandNodeName(instance.primary_node))
3275
      if pnode is None:
3276
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
3277
                                   self.op.pnode)
3278
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3279
      result.Raise("OS '%s' not in supported OS list for primary node %s" %
3280
                   (self.op.os_type, pnode.name), prereq=True)
3281

    
3282
    self.instance = instance
3283

    
3284
  def Exec(self, feedback_fn):
3285
    """Reinstall the instance.
3286

3287
    """
3288
    inst = self.instance
3289

    
3290
    if self.op.os_type is not None:
3291
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3292
      inst.os = self.op.os_type
3293
      self.cfg.Update(inst)
3294

    
3295
    _StartInstanceDisks(self, inst, None)
3296
    try:
3297
      feedback_fn("Running the instance OS create scripts...")
3298
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
3299
      result.Raise("Could not install OS for instance %s on node %s" %
3300
                   (inst.name, inst.primary_node))
3301
    finally:
3302
      _ShutdownInstanceDisks(self, inst)
3303

    
3304

    
3305
class LURenameInstance(LogicalUnit):
3306
  """Rename an instance.
3307

3308
  """
3309
  HPATH = "instance-rename"
3310
  HTYPE = constants.HTYPE_INSTANCE
3311
  _OP_REQP = ["instance_name", "new_name"]
3312

    
3313
  def BuildHooksEnv(self):
3314
    """Build hooks env.
3315

3316
    This runs on master, primary and secondary nodes of the instance.
3317

3318
    """
3319
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3320
    env["INSTANCE_NEW_NAME"] = self.op.new_name
3321
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3322
    return env, nl, nl
3323

    
3324
  def CheckPrereq(self):
3325
    """Check prerequisites.
3326

3327
    This checks that the instance is in the cluster and is not running.
3328

3329
    """
3330
    instance = self.cfg.GetInstanceInfo(
3331
      self.cfg.ExpandInstanceName(self.op.instance_name))
3332
    if instance is None:
3333
      raise errors.OpPrereqError("Instance '%s' not known" %
3334
                                 self.op.instance_name)
3335
    _CheckNodeOnline(self, instance.primary_node)
3336

    
3337
    if instance.admin_up:
3338
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3339
                                 self.op.instance_name)
3340
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3341
                                              instance.name,
3342
                                              instance.hypervisor)
3343
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3344
                      prereq=True)
3345
    if remote_info.payload:
3346
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3347
                                 (self.op.instance_name,
3348
                                  instance.primary_node))
3349
    self.instance = instance
3350

    
3351
    # new name verification
3352
    name_info = utils.HostInfo(self.op.new_name)
3353

    
3354
    self.op.new_name = new_name = name_info.name
3355
    instance_list = self.cfg.GetInstanceList()
3356
    if new_name in instance_list:
3357
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3358
                                 new_name)
3359

    
3360
    if not getattr(self.op, "ignore_ip", False):
3361
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
3362
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
3363
                                   (name_info.ip, new_name))
3364

    
3365

    
3366
  def Exec(self, feedback_fn):
3367
    """Reinstall the instance.
3368

3369
    """
3370
    inst = self.instance
3371
    old_name = inst.name
3372

    
3373
    if inst.disk_template == constants.DT_FILE:
3374
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3375

    
3376
    self.cfg.RenameInstance(inst.name, self.op.new_name)
3377
    # Change the instance lock. This is definitely safe while we hold the BGL
3378
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
3379
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
3380

    
3381
    # re-read the instance from the configuration after rename
3382
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
3383

    
3384
    if inst.disk_template == constants.DT_FILE:
3385
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3386
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
3387
                                                     old_file_storage_dir,
3388
                                                     new_file_storage_dir)
3389
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
3390
                   " (but the instance has been renamed in Ganeti)" %
3391
                   (inst.primary_node, old_file_storage_dir,
3392
                    new_file_storage_dir))
3393

    
3394
    _StartInstanceDisks(self, inst, None)
3395
    try:
3396
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
3397
                                                 old_name)
3398
      msg = result.fail_msg
3399
      if msg:
3400
        msg = ("Could not run OS rename script for instance %s on node %s"
3401
               " (but the instance has been renamed in Ganeti): %s" %
3402
               (inst.name, inst.primary_node, msg))
3403
        self.proc.LogWarning(msg)
3404
    finally:
3405
      _ShutdownInstanceDisks(self, inst)
3406

    
3407

    
3408
class LURemoveInstance(LogicalUnit):
3409
  """Remove an instance.
3410

3411
  """
3412
  HPATH = "instance-remove"
3413
  HTYPE = constants.HTYPE_INSTANCE
3414
  _OP_REQP = ["instance_name", "ignore_failures"]
3415
  REQ_BGL = False
3416

    
3417
  def ExpandNames(self):
3418
    self._ExpandAndLockInstance()
3419
    self.needed_locks[locking.LEVEL_NODE] = []
3420
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3421

    
3422
  def DeclareLocks(self, level):
3423
    if level == locking.LEVEL_NODE:
3424
      self._LockInstancesNodes()
3425

    
3426
  def BuildHooksEnv(self):
3427
    """Build hooks env.
3428

3429
    This runs on master, primary and secondary nodes of the instance.
3430

3431
    """
3432
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3433
    nl = [self.cfg.GetMasterNode()]
3434
    return env, nl, nl
3435

    
3436
  def CheckPrereq(self):
3437
    """Check prerequisites.
3438

3439
    This checks that the instance is in the cluster.
3440

3441
    """
3442
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3443
    assert self.instance is not None, \
3444
      "Cannot retrieve locked instance %s" % self.op.instance_name
3445

    
3446
  def Exec(self, feedback_fn):
3447
    """Remove the instance.
3448

3449
    """
3450
    instance = self.instance
3451
    logging.info("Shutting down instance %s on node %s",
3452
                 instance.name, instance.primary_node)
3453

    
3454
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
3455
    msg = result.fail_msg
3456
    if msg:
3457
      if self.op.ignore_failures:
3458
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
3459
      else:
3460
        raise errors.OpExecError("Could not shutdown instance %s on"
3461
                                 " node %s: %s" %
3462
                                 (instance.name, instance.primary_node, msg))
3463

    
3464
    logging.info("Removing block devices for instance %s", instance.name)
3465

    
3466
    if not _RemoveDisks(self, instance):
3467
      if self.op.ignore_failures:
3468
        feedback_fn("Warning: can't remove instance's disks")
3469
      else:
3470
        raise errors.OpExecError("Can't remove instance's disks")
3471

    
3472
    logging.info("Removing instance %s out of cluster config", instance.name)
3473

    
3474
    self.cfg.RemoveInstance(instance.name)
3475
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
3476

    
3477

    
3478
class LUQueryInstances(NoHooksLU):
3479
  """Logical unit for querying instances.
3480

3481
  """
3482
  _OP_REQP = ["output_fields", "names", "use_locking"]
3483
  REQ_BGL = False
3484
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
3485
                                    "admin_state",
3486
                                    "disk_template", "ip", "mac", "bridge",
3487
                                    "nic_mode", "nic_link",
3488
                                    "sda_size", "sdb_size", "vcpus", "tags",
3489
                                    "network_port", "beparams",
3490
                                    r"(disk)\.(size)/([0-9]+)",
3491
                                    r"(disk)\.(sizes)", "disk_usage",
3492
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
3493
                                    r"(nic)\.(bridge)/([0-9]+)",
3494
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
3495
                                    r"(disk|nic)\.(count)",
3496
                                    "serial_no", "hypervisor", "hvparams",] +
3497
                                  ["hv/%s" % name
3498
                                   for name in constants.HVS_PARAMETERS] +
3499
                                  ["be/%s" % name
3500
                                   for name in constants.BES_PARAMETERS])
3501
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
3502

    
3503

    
3504
  def ExpandNames(self):
3505
    _CheckOutputFields(static=self._FIELDS_STATIC,
3506
                       dynamic=self._FIELDS_DYNAMIC,
3507
                       selected=self.op.output_fields)
3508

    
3509
    self.needed_locks = {}
3510
    self.share_locks[locking.LEVEL_INSTANCE] = 1
3511
    self.share_locks[locking.LEVEL_NODE] = 1
3512

    
3513
    if self.op.names:
3514
      self.wanted = _GetWantedInstances(self, self.op.names)
3515
    else:
3516
      self.wanted = locking.ALL_SET
3517

    
3518
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3519
    self.do_locking = self.do_node_query and self.op.use_locking
3520
    if self.do_locking:
3521
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3522
      self.needed_locks[locking.LEVEL_NODE] = []
3523
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3524

    
3525
  def DeclareLocks(self, level):
3526
    if level == locking.LEVEL_NODE and self.do_locking:
3527
      self._LockInstancesNodes()
3528

    
3529
  def CheckPrereq(self):
3530
    """Check prerequisites.
3531

3532
    """
3533
    pass
3534

    
3535
  def Exec(self, feedback_fn):
3536
    """Computes the list of nodes and their attributes.
3537

3538
    """
3539
    all_info = self.cfg.GetAllInstancesInfo()
3540
    if self.wanted == locking.ALL_SET:
3541
      # caller didn't specify instance names, so ordering is not important
3542
      if self.do_locking:
3543
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
3544
      else:
3545
        instance_names = all_info.keys()
3546
      instance_names = utils.NiceSort(instance_names)
3547
    else:
3548
      # caller did specify names, so we must keep the ordering
3549
      if self.do_locking:
3550
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
3551
      else:
3552
        tgt_set = all_info.keys()
3553
      missing = set(self.wanted).difference(tgt_set)
3554
      if missing:
3555
        raise errors.OpExecError("Some instances were removed before"
3556
                                 " retrieving their data: %s" % missing)
3557
      instance_names = self.wanted
3558

    
3559
    instance_list = [all_info[iname] for iname in instance_names]
3560

    
3561
    # begin data gathering
3562

    
3563
    nodes = frozenset([inst.primary_node for inst in instance_list])
3564
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
3565

    
3566
    bad_nodes = []
3567
    off_nodes = []
3568
    if self.do_node_query:
3569
      live_data = {}
3570
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
3571
      for name in nodes:
3572
        result = node_data[name]
3573
        if result.offline:
3574
          # offline nodes will be in both lists
3575
          off_nodes.append(name)
3576
        if result.failed or result.fail_msg:
3577
          bad_nodes.append(name)
3578
        else:
3579
          if result.payload:
3580
            live_data.update(result.payload)
3581
          # else no instance is alive
3582
    else:
3583
      live_data = dict([(name, {}) for name in instance_names])
3584

    
3585
    # end data gathering
3586

    
3587
    HVPREFIX = "hv/"
3588
    BEPREFIX = "be/"
3589
    output = []
3590
    cluster = self.cfg.GetClusterInfo()
3591
    for instance in instance_list:
3592
      iout = []
3593
      i_hv = cluster.FillHV(instance)
3594
      i_be = cluster.FillBE(instance)
3595
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
3596
                                 nic.nicparams) for nic in instance.nics]
3597
      for field in self.op.output_fields:
3598
        st_match = self._FIELDS_STATIC.Matches(field)
3599
        if field == "name":
3600
          val = instance.name
3601
        elif field == "os":
3602
          val = instance.os
3603
        elif field == "pnode":
3604
          val = instance.primary_node
3605
        elif field == "snodes":
3606
          val = list(instance.secondary_nodes)
3607
        elif field == "admin_state":
3608
          val = instance.admin_up
3609
        elif field == "oper_state":
3610
          if instance.primary_node in bad_nodes:
3611
            val = None
3612
          else:
3613
            val = bool(live_data.get(instance.name))
3614
        elif field == "status":
3615
          if instance.primary_node in off_nodes:
3616
            val = "ERROR_nodeoffline"
3617
          elif instance.primary_node in bad_nodes:
3618
            val = "ERROR_nodedown"
3619
          else:
3620
            running = bool(live_data.get(instance.name))
3621
            if running:
3622
              if instance.admin_up:
3623
                val = "running"
3624
              else:
3625
                val = "ERROR_up"
3626
            else:
3627
              if instance.admin_up:
3628
                val = "ERROR_down"
3629
              else:
3630
                val = "ADMIN_down"
3631
        elif field == "oper_ram":
3632
          if instance.primary_node in bad_nodes:
3633
            val = None
3634
          elif instance.name in live_data:
3635
            val = live_data[instance.name].get("memory", "?")
3636
          else:
3637
            val = "-"
3638
        elif field == "vcpus":
3639
          val = i_be[constants.BE_VCPUS]
3640
        elif field == "disk_template":
3641
          val = instance.disk_template
3642
        elif field == "ip":
3643
          if instance.nics:
3644
            val = instance.nics[0].ip
3645
          else:
3646
            val = None
3647
        elif field == "nic_mode":
3648
          if instance.nics:
3649
            val = i_nicp[0][constants.NIC_MODE]
3650
          else:
3651
            val = None
3652
        elif field == "nic_link":
3653
          if instance.nics:
3654
            val = i_nicp[0][constants.NIC_LINK]
3655
          else:
3656
            val = None
3657
        elif field == "bridge":
3658
          if (instance.nics and
3659
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
3660
            val = i_nicp[0][constants.NIC_LINK]
3661
          else:
3662
            val = None
3663
        elif field == "mac":
3664
          if instance.nics:
3665
            val = instance.nics[0].mac
3666
          else:
3667
            val = None
3668
        elif field == "sda_size" or field == "sdb_size":
3669
          idx = ord(field[2]) - ord('a')
3670
          try:
3671
            val = instance.FindDisk(idx).size
3672
          except errors.OpPrereqError:
3673
            val = None
3674
        elif field == "disk_usage": # total disk usage per node
3675
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
3676
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
3677
        elif field == "tags":
3678
          val = list(instance.GetTags())
3679
        elif field == "serial_no":
3680
          val = instance.serial_no
3681
        elif field == "network_port":
3682
          val = instance.network_port
3683
        elif field == "hypervisor":
3684
          val = instance.hypervisor
3685
        elif field == "hvparams":
3686
          val = i_hv
3687
        elif (field.startswith(HVPREFIX) and
3688
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
3689
          val = i_hv.get(field[len(HVPREFIX):], None)
3690
        elif field == "beparams":
3691
          val = i_be
3692
        elif (field.startswith(BEPREFIX) and
3693
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
3694
          val = i_be.get(field[len(BEPREFIX):], None)
3695
        elif st_match and st_match.groups():
3696
          # matches a variable list
3697
          st_groups = st_match.groups()
3698
          if st_groups and st_groups[0] == "disk":
3699
            if st_groups[1] == "count":
3700
              val = len(instance.disks)
3701
            elif st_groups[1] == "sizes":
3702
              val = [disk.size for disk in instance.disks]
3703
            elif st_groups[1] == "size":
3704
              try:
3705
                val = instance.FindDisk(st_groups[2]).size
3706
              except errors.OpPrereqError:
3707
                val = None
3708
            else:
3709
              assert False, "Unhandled disk parameter"
3710
          elif st_groups[0] == "nic":
3711
            if st_groups[1] == "count":
3712
              val = len(instance.nics)
3713
            elif st_groups[1] == "macs":
3714
              val = [nic.mac for nic in instance.nics]
3715
            elif st_groups[1] == "ips":
3716
              val = [nic.ip for nic in instance.nics]
3717
            elif st_groups[1] == "modes":
3718
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
3719
            elif st_groups[1] == "links":
3720
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
3721
            elif st_groups[1] == "bridges":
3722
              val = []
3723
              for nicp in i_nicp:
3724
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3725
                  val.append(nicp[constants.NIC_LINK])
3726
                else:
3727
                  val.append(None)
3728
            else:
3729
              # index-based item
3730
              nic_idx = int(st_groups[2])
3731
              if nic_idx >= len(instance.nics):
3732
                val = None
3733
              else:
3734
                if st_groups[1] == "mac":
3735
                  val = instance.nics[nic_idx].mac
3736
                elif st_groups[1] == "ip":
3737
                  val = instance.nics[nic_idx].ip
3738
                elif st_groups[1] == "mode":
3739
                  val = i_nicp[nic_idx][constants.NIC_MODE]
3740
                elif st_groups[1] == "link":
3741
                  val = i_nicp[nic_idx][constants.NIC_LINK]
3742
                elif st_groups[1] == "bridge":
3743
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
3744
                  if nic_mode == constants.NIC_MODE_BRIDGED:
3745
                    val = i_nicp[nic_idx][constants.NIC_LINK]
3746
                  else:
3747
                    val = None
3748
                else:
3749
                  assert False, "Unhandled NIC parameter"
3750
          else:
3751
            assert False, ("Declared but unhandled variable parameter '%s'" %
3752
                           field)
3753
        else:
3754
          assert False, "Declared but unhandled parameter '%s'" % field
3755
        iout.append(val)
3756
      output.append(iout)
3757

    
3758
    return output
3759

    
3760

    
3761
class LUFailoverInstance(LogicalUnit):
3762
  """Failover an instance.
3763

3764
  """
3765
  HPATH = "instance-failover"
3766
  HTYPE = constants.HTYPE_INSTANCE
3767
  _OP_REQP = ["instance_name", "ignore_consistency"]
3768
  REQ_BGL = False
3769

    
3770
  def ExpandNames(self):
3771
    self._ExpandAndLockInstance()
3772
    self.needed_locks[locking.LEVEL_NODE] = []
3773
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3774

    
3775
  def DeclareLocks(self, level):
3776
    if level == locking.LEVEL_NODE:
3777
      self._LockInstancesNodes()
3778

    
3779
  def BuildHooksEnv(self):
3780
    """Build hooks env.
3781

3782
    This runs on master, primary and secondary nodes of the instance.
3783

3784
    """
3785
    env = {
3786
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
3787
      }
3788
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3789
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
3790
    return env, nl, nl
3791

    
3792
  def CheckPrereq(self):
3793
    """Check prerequisites.
3794

3795
    This checks that the instance is in the cluster.
3796

3797
    """
3798
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3799
    assert self.instance is not None, \
3800
      "Cannot retrieve locked instance %s" % self.op.instance_name
3801

    
3802
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3803
    if instance.disk_template not in constants.DTS_NET_MIRROR:
3804
      raise errors.OpPrereqError("Instance's disk layout is not"
3805
                                 " network mirrored, cannot failover.")
3806

    
3807
    secondary_nodes = instance.secondary_nodes
3808
    if not secondary_nodes:
3809
      raise errors.ProgrammerError("no secondary node but using "
3810
                                   "a mirrored disk template")
3811

    
3812
    target_node = secondary_nodes[0]
3813
    _CheckNodeOnline(self, target_node)
3814
    _CheckNodeNotDrained(self, target_node)
3815
    if instance.admin_up:
3816
      # check memory requirements on the secondary node
3817
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
3818
                           instance.name, bep[constants.BE_MEMORY],
3819
                           instance.hypervisor)
3820
    else:
3821
      self.LogInfo("Not checking memory on the secondary node as"
3822
                   " instance will not be started")
3823

    
3824
    # check bridge existance
3825
    _CheckInstanceBridgesExist(self, instance, node=target_node)
3826

    
3827
  def Exec(self, feedback_fn):
3828
    """Failover an instance.
3829

3830
    The failover is done by shutting it down on its present node and
3831
    starting it on the secondary.
3832

3833
    """
3834
    instance = self.instance
3835

    
3836
    source_node = instance.primary_node
3837
    target_node = instance.secondary_nodes[0]
3838

    
3839
    feedback_fn("* checking disk consistency between source and target")
3840
    for dev in instance.disks:
3841
      # for drbd, these are drbd over lvm
3842
      if not _CheckDiskConsistency(self, dev, target_node, False):
3843
        if instance.admin_up and not self.op.ignore_consistency:
3844
          raise errors.OpExecError("Disk %s is degraded on target node,"
3845
                                   " aborting failover." % dev.iv_name)
3846

    
3847
    feedback_fn("* shutting down instance on source node")
3848
    logging.info("Shutting down instance %s on node %s",
3849
                 instance.name, source_node)
3850

    
3851
    result = self.rpc.call_instance_shutdown(source_node, instance)
3852
    msg = result.fail_msg
3853
    if msg:
3854
      if self.op.ignore_consistency:
3855
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
3856
                             " Proceeding anyway. Please make sure node"
3857
                             " %s is down. Error details: %s",
3858
                             instance.name, source_node, source_node, msg)
3859
      else:
3860
        raise errors.OpExecError("Could not shutdown instance %s on"
3861
                                 " node %s: %s" %
3862
                                 (instance.name, source_node, msg))
3863

    
3864
    feedback_fn("* deactivating the instance's disks on source node")
3865
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
3866
      raise errors.OpExecError("Can't shut down the instance's disks.")
3867

    
3868
    instance.primary_node = target_node
3869
    # distribute new instance config to the other nodes
3870
    self.cfg.Update(instance)
3871

    
3872
    # Only start the instance if it's marked as up
3873
    if instance.admin_up:
3874
      feedback_fn("* activating the instance's disks on target node")
3875
      logging.info("Starting instance %s on node %s",
3876
                   instance.name, target_node)
3877

    
3878
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
3879
                                               ignore_secondaries=True)
3880
      if not disks_ok:
3881
        _ShutdownInstanceDisks(self, instance)
3882
        raise errors.OpExecError("Can't activate the instance's disks")
3883

    
3884
      feedback_fn("* starting the instance on the target node")
3885
      result = self.rpc.call_instance_start(target_node, instance, None, None)
3886
      msg = result.fail_msg
3887
      if msg:
3888
        _ShutdownInstanceDisks(self, instance)
3889
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
3890
                                 (instance.name, target_node, msg))
3891

    
3892

    
3893
class LUMigrateInstance(LogicalUnit):
3894
  """Migrate an instance.
3895

3896
  This is migration without shutting down, compared to the failover,
3897
  which is done with shutdown.
3898

3899
  """
3900
  HPATH = "instance-migrate"
3901
  HTYPE = constants.HTYPE_INSTANCE
3902
  _OP_REQP = ["instance_name", "live", "cleanup"]
3903

    
3904
  REQ_BGL = False
3905

    
3906
  def ExpandNames(self):
3907
    self._ExpandAndLockInstance()
3908
    self.needed_locks[locking.LEVEL_NODE] = []
3909
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3910

    
3911
  def DeclareLocks(self, level):
3912
    if level == locking.LEVEL_NODE:
3913
      self._LockInstancesNodes()
3914

    
3915
  def BuildHooksEnv(self):
3916
    """Build hooks env.
3917

3918
    This runs on master, primary and secondary nodes of the instance.
3919

3920
    """
3921
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3922
    env["MIGRATE_LIVE"] = self.op.live
3923
    env["MIGRATE_CLEANUP"] = self.op.cleanup
3924
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
3925
    return env, nl, nl
3926

    
3927
  def CheckPrereq(self):
3928
    """Check prerequisites.
3929

3930
    This checks that the instance is in the cluster.
3931

3932
    """
3933
    instance = self.cfg.GetInstanceInfo(
3934
      self.cfg.ExpandInstanceName(self.op.instance_name))
3935
    if instance is None:
3936
      raise errors.OpPrereqError("Instance '%s' not known" %
3937
                                 self.op.instance_name)
3938

    
3939
    if instance.disk_template != constants.DT_DRBD8:
3940
      raise errors.OpPrereqError("Instance's disk layout is not"
3941
                                 " drbd8, cannot migrate.")
3942

    
3943
    secondary_nodes = instance.secondary_nodes
3944
    if not secondary_nodes:
3945
      raise errors.ConfigurationError("No secondary node but using"
3946
                                      " drbd8 disk template")
3947

    
3948
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
3949

    
3950
    target_node = secondary_nodes[0]
3951
    # check memory requirements on the secondary node
3952
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
3953
                         instance.name, i_be[constants.BE_MEMORY],
3954
                         instance.hypervisor)
3955

    
3956
    # check bridge existance
3957
    _CheckInstanceBridgesExist(self, instance, node=target_node)
3958

    
3959
    if not self.op.cleanup:
3960
      _CheckNodeNotDrained(self, target_node)
3961
      result = self.rpc.call_instance_migratable(instance.primary_node,
3962
                                                 instance)
3963
      result.Raise("Can't migrate, please use failover", prereq=True)
3964

    
3965
    self.instance = instance
3966

    
3967
  def _WaitUntilSync(self):
3968
    """Poll with custom rpc for disk sync.
3969

3970
    This uses our own step-based rpc call.
3971

3972
    """
3973
    self.feedback_fn("* wait until resync is done")
3974
    all_done = False
3975
    while not all_done:
3976
      all_done = True
3977
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
3978
                                            self.nodes_ip,
3979
                                            self.instance.disks)
3980
      min_percent = 100
3981
      for node, nres in result.items():
3982
        nres.Raise("Cannot resync disks on node %s" % node)
3983
        node_done, node_percent = nres.payload
3984
        all_done = all_done and node_done
3985
        if node_percent is not None:
3986
          min_percent = min(min_percent, node_percent)
3987
      if not all_done:
3988
        if min_percent < 100:
3989
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
3990
        time.sleep(2)
3991

    
3992
  def _EnsureSecondary(self, node):
3993
    """Demote a node to secondary.
3994

3995
    """
3996
    self.feedback_fn("* switching node %s to secondary mode" % node)
3997

    
3998
    for dev in self.instance.disks:
3999
      self.cfg.SetDiskID(dev, node)
4000

    
4001
    result = self.rpc.call_blockdev_close(node, self.instance.name,
4002
                                          self.instance.disks)
4003
    result.Raise("Cannot change disk to secondary on node %s" % node)
4004

    
4005
  def _GoStandalone(self):
4006
    """Disconnect from the network.
4007

4008
    """
4009
    self.feedback_fn("* changing into standalone mode")
4010
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
4011
                                               self.instance.disks)
4012
    for node, nres in result.items():
4013
      nres.Raise("Cannot disconnect disks node %s" % node)
4014

    
4015
  def _GoReconnect(self, multimaster):
4016
    """Reconnect to the network.
4017

4018
    """
4019
    if multimaster:
4020
      msg = "dual-master"
4021
    else:
4022
      msg = "single-master"
4023
    self.feedback_fn("* changing disks into %s mode" % msg)
4024
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
4025
                                           self.instance.disks,
4026
                                           self.instance.name, multimaster)
4027
    for node, nres in result.items():
4028
      nres.Raise("Cannot change disks config on node %s" % node)
4029

    
4030
  def _ExecCleanup(self):
4031
    """Try to cleanup after a failed migration.
4032

4033
    The cleanup is done by:
4034
      - check that the instance is running only on one node
4035
        (and update the config if needed)
4036
      - change disks on its secondary node to secondary
4037
      - wait until disks are fully synchronized
4038
      - disconnect from the network
4039
      - change disks into single-master mode
4040
      - wait again until disks are fully synchronized
4041

4042
    """
4043
    instance = self.instance
4044
    target_node = self.target_node
4045
    source_node = self.source_node
4046

    
4047
    # check running on only one node
4048
    self.feedback_fn("* checking where the instance actually runs"
4049
                     " (if this hangs, the hypervisor might be in"
4050
                     " a bad state)")
4051
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
4052
    for node, result in ins_l.items():
4053
      result.Raise("Can't contact node %s" % node)
4054

    
4055
    runningon_source = instance.name in ins_l[source_node].payload
4056
    runningon_target = instance.name in ins_l[target_node].payload
4057

    
4058
    if runningon_source and runningon_target:
4059
      raise errors.OpExecError("Instance seems to be running on two nodes,"
4060
                               " or the hypervisor is confused. You will have"
4061
                               " to ensure manually that it runs only on one"
4062
                               " and restart this operation.")
4063

    
4064
    if not (runningon_source or runningon_target):
4065
      raise errors.OpExecError("Instance does not seem to be running at all."
4066
                               " In this case, it's safer to repair by"
4067
                               " running 'gnt-instance stop' to ensure disk"
4068
                               " shutdown, and then restarting it.")
4069

    
4070
    if runningon_target:
4071
      # the migration has actually succeeded, we need to update the config
4072
      self.feedback_fn("* instance running on secondary node (%s),"
4073
                       " updating config" % target_node)
4074
      instance.primary_node = target_node
4075
      self.cfg.Update(instance)
4076
      demoted_node = source_node
4077
    else:
4078
      self.feedback_fn("* instance confirmed to be running on its"
4079
                       " primary node (%s)" % source_node)
4080
      demoted_node = target_node
4081

    
4082
    self._EnsureSecondary(demoted_node)
4083
    try:
4084
      self._WaitUntilSync()
4085
    except errors.OpExecError:
4086
      # we ignore here errors, since if the device is standalone, it
4087
      # won't be able to sync
4088
      pass
4089
    self._GoStandalone()
4090
    self._GoReconnect(False)
4091
    self._WaitUntilSync()
4092

    
4093
    self.feedback_fn("* done")
4094

    
4095
  def _RevertDiskStatus(self):
4096
    """Try to revert the disk status after a failed migration.
4097

4098
    """
4099
    target_node = self.target_node
4100
    try:
4101
      self._EnsureSecondary(target_node)
4102
      self._GoStandalone()
4103
      self._GoReconnect(False)
4104
      self._WaitUntilSync()
4105
    except errors.OpExecError, err:
4106
      self.LogWarning("Migration failed and I can't reconnect the"
4107
                      " drives: error '%s'\n"
4108
                      "Please look and recover the instance status" %
4109
                      str(err))
4110

    
4111
  def _AbortMigration(self):
4112
    """Call the hypervisor code to abort a started migration.
4113

4114
    """
4115
    instance = self.instance
4116
    target_node = self.target_node
4117
    migration_info = self.migration_info
4118

    
4119
    abort_result = self.rpc.call_finalize_migration(target_node,
4120
                                                    instance,
4121
                                                    migration_info,
4122
                                                    False)
4123
    abort_msg = abort_result.fail_msg
4124
    if abort_msg:
4125
      logging.error("Aborting migration failed on target node %s: %s" %
4126
                    (target_node, abort_msg))
4127
      # Don't raise an exception here, as we stil have to try to revert the
4128
      # disk status, even if this step failed.
4129

    
4130
  def _ExecMigration(self):
4131
    """Migrate an instance.
4132

4133
    The migrate is done by:
4134
      - change the disks into dual-master mode
4135
      - wait until disks are fully synchronized again
4136
      - migrate the instance
4137
      - change disks on the new secondary node (the old primary) to secondary
4138
      - wait until disks are fully synchronized
4139
      - change disks into single-master mode
4140

4141
    """
4142
    instance = self.instance
4143
    target_node = self.target_node
4144
    source_node = self.source_node
4145

    
4146
    self.feedback_fn("* checking disk consistency between source and target")
4147
    for dev in instance.disks:
4148
      if not _CheckDiskConsistency(self, dev, target_node, False):
4149
        raise errors.OpExecError("Disk %s is degraded or not fully"
4150
                                 " synchronized on target node,"
4151
                                 " aborting migrate." % dev.iv_name)
4152

    
4153
    # First get the migration information from the remote node
4154
    result = self.rpc.call_migration_info(source_node, instance)
4155
    msg = result.fail_msg
4156
    if msg:
4157
      log_err = ("Failed fetching source migration information from %s: %s" %
4158
                 (source_node, msg))
4159
      logging.error(log_err)
4160
      raise errors.OpExecError(log_err)
4161

    
4162
    self.migration_info = migration_info = result.payload
4163

    
4164
    # Then switch the disks to master/master mode
4165
    self._EnsureSecondary(target_node)
4166
    self._GoStandalone()
4167
    self._GoReconnect(True)
4168
    self._WaitUntilSync()
4169

    
4170
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
4171
    result = self.rpc.call_accept_instance(target_node,
4172
                                           instance,
4173
                                           migration_info,
4174
                                           self.nodes_ip[target_node])
4175

    
4176
    msg = result.fail_msg
4177
    if msg:
4178
      logging.error("Instance pre-migration failed, trying to revert"
4179
                    " disk status: %s", msg)
4180
      self._AbortMigration()
4181
      self._RevertDiskStatus()
4182
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
4183
                               (instance.name, msg))
4184

    
4185
    self.feedback_fn("* migrating instance to %s" % target_node)
4186
    time.sleep(10)
4187
    result = self.rpc.call_instance_migrate(source_node, instance,
4188
                                            self.nodes_ip[target_node],
4189
                                            self.op.live)
4190
    msg = result.fail_msg
4191
    if msg:
4192
      logging.error("Instance migration failed, trying to revert"
4193
                    " disk status: %s", msg)
4194
      self._AbortMigration()
4195
      self._RevertDiskStatus()
4196
      raise errors.OpExecError("Could not migrate instance %s: %s" %
4197
                               (instance.name, msg))
4198
    time.sleep(10)
4199

    
4200
    instance.primary_node = target_node
4201
    # distribute new instance config to the other nodes
4202
    self.cfg.Update(instance)
4203

    
4204
    result = self.rpc.call_finalize_migration(target_node,
4205
                                              instance,
4206
                                              migration_info,
4207
                                              True)
4208
    msg = result.fail_msg
4209
    if msg:
4210
      logging.error("Instance migration succeeded, but finalization failed:"
4211
                    " %s" % msg)
4212
      raise errors.OpExecError("Could not finalize instance migration: %s" %
4213
                               msg)
4214

    
4215
    self._EnsureSecondary(source_node)
4216
    self._WaitUntilSync()
4217
    self._GoStandalone()
4218
    self._GoReconnect(False)
4219
    self._WaitUntilSync()
4220

    
4221
    self.feedback_fn("* done")
4222

    
4223
  def Exec(self, feedback_fn):
4224
    """Perform the migration.
4225

4226
    """
4227
    self.feedback_fn = feedback_fn
4228

    
4229
    self.source_node = self.instance.primary_node
4230
    self.target_node = self.instance.secondary_nodes[0]
4231
    self.all_nodes = [self.source_node, self.target_node]
4232
    self.nodes_ip = {
4233
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
4234
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
4235
      }
4236
    if self.op.cleanup:
4237
      return self._ExecCleanup()
4238
    else:
4239
      return self._ExecMigration()
4240

    
4241

    
4242
def _CreateBlockDev(lu, node, instance, device, force_create,
4243
                    info, force_open):
4244
  """Create a tree of block devices on a given node.
4245

4246
  If this device type has to be created on secondaries, create it and
4247
  all its children.
4248

4249
  If not, just recurse to children keeping the same 'force' value.
4250

4251
  @param lu: the lu on whose behalf we execute
4252
  @param node: the node on which to create the device
4253
  @type instance: L{objects.Instance}
4254
  @param instance: the instance which owns the device
4255
  @type device: L{objects.Disk}
4256
  @param device: the device to create
4257
  @type force_create: boolean
4258
  @param force_create: whether to force creation of this device; this
4259
      will be change to True whenever we find a device which has
4260
      CreateOnSecondary() attribute
4261
  @param info: the extra 'metadata' we should attach to the device
4262
      (this will be represented as a LVM tag)
4263
  @type force_open: boolean
4264
  @param force_open: this parameter will be passes to the
4265
      L{backend.BlockdevCreate} function where it specifies
4266
      whether we run on primary or not, and it affects both
4267
      the child assembly and the device own Open() execution
4268

4269
  """
4270
  if device.CreateOnSecondary():
4271
    force_create = True
4272

    
4273
  if device.children:
4274
    for child in device.children:
4275
      _CreateBlockDev(lu, node, instance, child, force_create,
4276
                      info, force_open)
4277

    
4278
  if not force_create:
4279
    return
4280

    
4281
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
4282

    
4283

    
4284
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
4285
  """Create a single block device on a given node.
4286

4287
  This will not recurse over children of the device, so they must be
4288
  created in advance.
4289

4290
  @param lu: the lu on whose behalf we execute
4291
  @param node: the node on which to create the device
4292
  @type instance: L{objects.Instance}
4293
  @param instance: the instance which owns the device
4294
  @type device: L{objects.Disk}
4295
  @param device: the device to create
4296
  @param info: the extra 'metadata' we should attach to the device
4297
      (this will be represented as a LVM tag)
4298
  @type force_open: boolean
4299
  @param force_open: this parameter will be passes to the
4300
      L{backend.BlockdevCreate} function where it specifies
4301
      whether we run on primary or not, and it affects both
4302
      the child assembly and the device own Open() execution
4303

4304
  """
4305
  lu.cfg.SetDiskID(device, node)
4306
  result = lu.rpc.call_blockdev_create(node, device, device.size,
4307
                                       instance.name, force_open, info)
4308
  result.Raise("Can't create block device %s on"
4309
               " node %s for instance %s" % (device, node, instance.name))
4310
  if device.physical_id is None:
4311
    device.physical_id = result.payload
4312

    
4313

    
4314
def _GenerateUniqueNames(lu, exts):
4315
  """Generate a suitable LV name.
4316

4317
  This will generate a logical volume name for the given instance.
4318

4319
  """
4320
  results = []
4321
  for val in exts:
4322
    new_id = lu.cfg.GenerateUniqueID()
4323
    results.append("%s%s" % (new_id, val))
4324
  return results
4325

    
4326

    
4327
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
4328
                         p_minor, s_minor):
4329
  """Generate a drbd8 device complete with its children.
4330

4331
  """
4332
  port = lu.cfg.AllocatePort()
4333
  vgname = lu.cfg.GetVGName()
4334
  shared_secret = lu.cfg.GenerateDRBDSecret()
4335
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
4336
                          logical_id=(vgname, names[0]))
4337
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
4338
                          logical_id=(vgname, names[1]))
4339
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
4340
                          logical_id=(primary, secondary, port,
4341
                                      p_minor, s_minor,
4342
                                      shared_secret),
4343
                          children=[dev_data, dev_meta],
4344
                          iv_name=iv_name)
4345
  return drbd_dev
4346

    
4347

    
4348
def _GenerateDiskTemplate(lu, template_name,
4349
                          instance_name, primary_node,
4350
                          secondary_nodes, disk_info,
4351
                          file_storage_dir, file_driver,
4352
                          base_index):
4353
  """Generate the entire disk layout for a given template type.
4354

4355
  """
4356
  #TODO: compute space requirements
4357

    
4358
  vgname = lu.cfg.GetVGName()
4359
  disk_count = len(disk_info)
4360
  disks = []
4361
  if template_name == constants.DT_DISKLESS:
4362
    pass
4363
  elif template_name == constants.DT_PLAIN:
4364
    if len(secondary_nodes) != 0:
4365
      raise errors.ProgrammerError("Wrong template configuration")
4366

    
4367
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
4368
                                      for i in range(disk_count)])
4369
    for idx, disk in enumerate(disk_info):
4370
      disk_index = idx + base_index
4371
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
4372
                              logical_id=(vgname, names[idx]),
4373
                              iv_name="disk/%d" % disk_index,
4374
                              mode=disk["mode"])
4375
      disks.append(disk_dev)
4376
  elif template_name == constants.DT_DRBD8:
4377
    if len(secondary_nodes) != 1:
4378
      raise errors.ProgrammerError("Wrong template configuration")
4379
    remote_node = secondary_nodes[0]
4380
    minors = lu.cfg.AllocateDRBDMinor(
4381
      [primary_node, remote_node] * len(disk_info), instance_name)
4382

    
4383
    names = []
4384
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
4385
                                               for i in range(disk_count)]):
4386
      names.append(lv_prefix + "_data")
4387
      names.append(lv_prefix + "_meta")
4388
    for idx, disk in enumerate(disk_info):
4389
      disk_index = idx + base_index
4390
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
4391
                                      disk["size"], names[idx*2:idx*2+2],
4392
                                      "disk/%d" % disk_index,
4393
                                      minors[idx*2], minors[idx*2+1])
4394
      disk_dev.mode = disk["mode"]
4395
      disks.append(disk_dev)
4396
  elif template_name == constants.DT_FILE:
4397
    if len(secondary_nodes) != 0:
4398
      raise errors.ProgrammerError("Wrong template configuration")
4399

    
4400
    for idx, disk in enumerate(disk_info):
4401
      disk_index = idx + base_index
4402
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
4403
                              iv_name="disk/%d" % disk_index,
4404
                              logical_id=(file_driver,
4405
                                          "%s/disk%d" % (file_storage_dir,
4406
                                                         disk_index)),
4407
                              mode=disk["mode"])
4408
      disks.append(disk_dev)
4409
  else:
4410
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
4411
  return disks
4412

    
4413

    
4414
def _GetInstanceInfoText(instance):
4415
  """Compute that text that should be added to the disk's metadata.
4416

4417
  """
4418
  return "originstname+%s" % instance.name
4419

    
4420

    
4421
def _CreateDisks(lu, instance):
4422
  """Create all disks for an instance.
4423

4424
  This abstracts away some work from AddInstance.
4425

4426
  @type lu: L{LogicalUnit}
4427
  @param lu: the logical unit on whose behalf we execute
4428
  @type instance: L{objects.Instance}
4429
  @param instance: the instance whose disks we should create
4430
  @rtype: boolean
4431
  @return: the success of the creation
4432

4433
  """
4434
  info = _GetInstanceInfoText(instance)
4435
  pnode = instance.primary_node
4436

    
4437
  if instance.disk_template == constants.DT_FILE:
4438
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4439
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
4440

    
4441
    result.Raise("Failed to create directory '%s' on"
4442
                 " node %s: %s" % (file_storage_dir, pnode))
4443

    
4444
  # Note: this needs to be kept in sync with adding of disks in
4445
  # LUSetInstanceParams
4446
  for device in instance.disks:
4447
    logging.info("Creating volume %s for instance %s",
4448
                 device.iv_name, instance.name)
4449
    #HARDCODE
4450
    for node in instance.all_nodes:
4451
      f_create = node == pnode
4452
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
4453

    
4454

    
4455
def _RemoveDisks(lu, instance):
4456
  """Remove all disks for an instance.
4457

4458
  This abstracts away some work from `AddInstance()` and
4459
  `RemoveInstance()`. Note that in case some of the devices couldn't
4460
  be removed, the removal will continue with the other ones (compare
4461
  with `_CreateDisks()`).
4462

4463
  @type lu: L{LogicalUnit}
4464
  @param lu: the logical unit on whose behalf we execute
4465
  @type instance: L{objects.Instance}
4466
  @param instance: the instance whose disks we should remove
4467
  @rtype: boolean
4468
  @return: the success of the removal
4469

4470
  """
4471
  logging.info("Removing block devices for instance %s", instance.name)
4472

    
4473
  all_result = True
4474
  for device in instance.disks:
4475
    for node, disk in device.ComputeNodeTree(instance.primary_node):
4476
      lu.cfg.SetDiskID(disk, node)
4477
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
4478
      if msg:
4479
        lu.LogWarning("Could not remove block device %s on node %s,"
4480
                      " continuing anyway: %s", device.iv_name, node, msg)
4481
        all_result = False
4482

    
4483
  if instance.disk_template == constants.DT_FILE:
4484
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4485
    result = lu.rpc.call_file_storage_dir_remove(instance.primary_node,
4486
                                                 file_storage_dir)
4487
    msg = result.fail_msg
4488
    if msg:
4489
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
4490
                    file_storage_dir, instance.primary_node, msg)
4491
      all_result = False
4492

    
4493
  return all_result
4494

    
4495

    
4496
def _ComputeDiskSize(disk_template, disks):
4497
  """Compute disk size requirements in the volume group
4498

4499
  """
4500
  # Required free disk space as a function of disk and swap space
4501
  req_size_dict = {
4502
    constants.DT_DISKLESS: None,
4503
    constants.DT_PLAIN: sum(d["size"] for d in disks),
4504
    # 128 MB are added for drbd metadata for each disk
4505
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
4506
    constants.DT_FILE: None,
4507
  }
4508

    
4509
  if disk_template not in req_size_dict:
4510
    raise errors.ProgrammerError("Disk template '%s' size requirement"
4511
                                 " is unknown" %  disk_template)
4512

    
4513
  return req_size_dict[disk_template]
4514

    
4515

    
4516
def _CheckHVParams(lu, nodenames, hvname, hvparams):
4517
  """Hypervisor parameter validation.
4518

4519
  This function abstract the hypervisor parameter validation to be
4520
  used in both instance create and instance modify.
4521

4522
  @type lu: L{LogicalUnit}
4523
  @param lu: the logical unit for which we check
4524
  @type nodenames: list
4525
  @param nodenames: the list of nodes on which we should check
4526
  @type hvname: string
4527
  @param hvname: the name of the hypervisor we should use
4528
  @type hvparams: dict
4529
  @param hvparams: the parameters which we need to check
4530
  @raise errors.OpPrereqError: if the parameters are not valid
4531

4532
  """
4533
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
4534
                                                  hvname,
4535
                                                  hvparams)
4536
  for node in nodenames:
4537
    info = hvinfo[node]
4538
    if info.offline:
4539
      continue
4540
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
4541

    
4542

    
4543
class LUCreateInstance(LogicalUnit):
4544
  """Create an instance.
4545

4546
  """
4547
  HPATH = "instance-add"
4548
  HTYPE = constants.HTYPE_INSTANCE
4549
  _OP_REQP = ["instance_name", "disks", "disk_template",
4550
              "mode", "start",
4551
              "wait_for_sync", "ip_check", "nics",
4552
              "hvparams", "beparams"]
4553
  REQ_BGL = False
4554

    
4555
  def _ExpandNode(self, node):
4556
    """Expands and checks one node name.
4557

4558
    """
4559
    node_full = self.cfg.ExpandNodeName(node)
4560
    if node_full is None:
4561
      raise errors.OpPrereqError("Unknown node %s" % node)
4562
    return node_full
4563

    
4564
  def ExpandNames(self):
4565
    """ExpandNames for CreateInstance.
4566

4567
    Figure out the right locks for instance creation.
4568

4569
    """
4570
    self.needed_locks = {}
4571

    
4572
    # set optional parameters to none if they don't exist
4573
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
4574
      if not hasattr(self.op, attr):
4575
        setattr(self.op, attr, None)
4576

    
4577
    # cheap checks, mostly valid constants given
4578

    
4579
    # verify creation mode
4580
    if self.op.mode not in (constants.INSTANCE_CREATE,
4581
                            constants.INSTANCE_IMPORT):
4582
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
4583
                                 self.op.mode)
4584

    
4585
    # disk template and mirror node verification
4586
    if self.op.disk_template not in constants.DISK_TEMPLATES:
4587
      raise errors.OpPrereqError("Invalid disk template name")
4588

    
4589
    if self.op.hypervisor is None:
4590
      self.op.hypervisor = self.cfg.GetHypervisorType()
4591

    
4592
    cluster = self.cfg.GetClusterInfo()
4593
    enabled_hvs = cluster.enabled_hypervisors
4594
    if self.op.hypervisor not in enabled_hvs:
4595
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
4596
                                 " cluster (%s)" % (self.op.hypervisor,
4597
                                  ",".join(enabled_hvs)))
4598

    
4599
    # check hypervisor parameter syntax (locally)
4600
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4601
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
4602
                                  self.op.hvparams)
4603
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
4604
    hv_type.CheckParameterSyntax(filled_hvp)
4605
    self.hv_full = filled_hvp
4606

    
4607
    # fill and remember the beparams dict
4608
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4609
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
4610
                                    self.op.beparams)
4611

    
4612
    #### instance parameters check
4613

    
4614
    # instance name verification
4615
    hostname1 = utils.HostInfo(self.op.instance_name)
4616
    self.op.instance_name = instance_name = hostname1.name
4617

    
4618
    # this is just a preventive check, but someone might still add this
4619
    # instance in the meantime, and creation will fail at lock-add time
4620
    if instance_name in self.cfg.GetInstanceList():
4621
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4622
                                 instance_name)
4623

    
4624
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
4625

    
4626
    # NIC buildup
4627
    self.nics = []
4628
    for idx, nic in enumerate(self.op.nics):
4629
      nic_mode_req = nic.get("mode", None)
4630
      nic_mode = nic_mode_req
4631
      if nic_mode is None:
4632
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
4633

    
4634
      # in routed mode, for the first nic, the default ip is 'auto'
4635
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
4636
        default_ip_mode = constants.VALUE_AUTO
4637
      else:
4638
        default_ip_mode = constants.VALUE_NONE
4639

    
4640
      # ip validity checks
4641
      ip = nic.get("ip", default_ip_mode)
4642
      if ip is None or ip.lower() == constants.VALUE_NONE:
4643
        nic_ip = None
4644
      elif ip.lower() == constants.VALUE_AUTO:
4645
        nic_ip = hostname1.ip
4646
      else:
4647
        if not utils.IsValidIP(ip):
4648
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
4649
                                     " like a valid IP" % ip)
4650
        nic_ip = ip
4651

    
4652
      # TODO: check the ip for uniqueness !!
4653
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
4654
        raise errors.OpPrereqError("Routed nic mode requires an ip address")
4655

    
4656
      # MAC address verification
4657
      mac = nic.get("mac", constants.VALUE_AUTO)
4658
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
4659
        if not utils.IsValidMac(mac.lower()):
4660
          raise errors.OpPrereqError("Invalid MAC address specified: %s" %
4661
                                     mac)
4662
      # bridge verification
4663
      bridge = nic.get("bridge", None)
4664
      link = nic.get("link", None)
4665
      if bridge and link:
4666
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
4667
                                   " at the same time")
4668
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
4669
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic")
4670
      elif bridge:
4671
        link = bridge
4672

    
4673
      nicparams = {}
4674
      if nic_mode_req:
4675
        nicparams[constants.NIC_MODE] = nic_mode_req
4676
      if link:
4677
        nicparams[constants.NIC_LINK] = link
4678

    
4679
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4680
                                      nicparams)
4681
      objects.NIC.CheckParameterSyntax(check_params)
4682
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
4683

    
4684
    # disk checks/pre-build
4685
    self.disks = []
4686
    for disk in self.op.disks:
4687
      mode = disk.get("mode", constants.DISK_RDWR)
4688
      if mode not in constants.DISK_ACCESS_SET:
4689
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
4690
                                   mode)
4691
      size = disk.get("size", None)
4692
      if size is None:
4693
        raise errors.OpPrereqError("Missing disk size")
4694
      try:
4695
        size = int(size)
4696
      except ValueError:
4697
        raise errors.OpPrereqError("Invalid disk size '%s'" % size)
4698
      self.disks.append({"size": size, "mode": mode})
4699

    
4700
    # used in CheckPrereq for ip ping check
4701
    self.check_ip = hostname1.ip
4702

    
4703
    # file storage checks
4704
    if (self.op.file_driver and
4705
        not self.op.file_driver in constants.FILE_DRIVER):
4706
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
4707
                                 self.op.file_driver)
4708

    
4709
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
4710
      raise errors.OpPrereqError("File storage directory path not absolute")
4711

    
4712
    ### Node/iallocator related checks
4713
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
4714
      raise errors.OpPrereqError("One and only one of iallocator and primary"
4715
                                 " node must be given")
4716

    
4717
    if self.op.iallocator:
4718
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4719
    else:
4720
      self.op.pnode = self._ExpandNode(self.op.pnode)
4721
      nodelist = [self.op.pnode]
4722
      if self.op.snode is not None:
4723
        self.op.snode = self._ExpandNode(self.op.snode)
4724
        nodelist.append(self.op.snode)
4725
      self.needed_locks[locking.LEVEL_NODE] = nodelist
4726

    
4727
    # in case of import lock the source node too
4728
    if self.op.mode == constants.INSTANCE_IMPORT:
4729
      src_node = getattr(self.op, "src_node", None)
4730
      src_path = getattr(self.op, "src_path", None)
4731

    
4732
      if src_path is None:
4733
        self.op.src_path = src_path = self.op.instance_name
4734

    
4735
      if src_node is None:
4736
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4737
        self.op.src_node = None
4738
        if os.path.isabs(src_path):
4739
          raise errors.OpPrereqError("Importing an instance from an absolute"
4740
                                     " path requires a source node option.")
4741
      else:
4742
        self.op.src_node = src_node = self._ExpandNode(src_node)
4743
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4744
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
4745
        if not os.path.isabs(src_path):
4746
          self.op.src_path = src_path = \
4747
            os.path.join(constants.EXPORT_DIR, src_path)
4748

    
4749
    else: # INSTANCE_CREATE
4750
      if getattr(self.op, "os_type", None) is None:
4751
        raise errors.OpPrereqError("No guest OS specified")
4752

    
4753
  def _RunAllocator(self):
4754
    """Run the allocator based on input opcode.
4755

4756
    """
4757
    nics = [n.ToDict() for n in self.nics]
4758
    ial = IAllocator(self.cfg, self.rpc,
4759
                     mode=constants.IALLOCATOR_MODE_ALLOC,
4760
                     name=self.op.instance_name,
4761
                     disk_template=self.op.disk_template,
4762
                     tags=[],
4763
                     os=self.op.os_type,
4764
                     vcpus=self.be_full[constants.BE_VCPUS],
4765
                     mem_size=self.be_full[constants.BE_MEMORY],
4766
                     disks=self.disks,
4767
                     nics=nics,
4768
                     hypervisor=self.op.hypervisor,
4769
                     )
4770

    
4771
    ial.Run(self.op.iallocator)
4772

    
4773
    if not ial.success:
4774
      raise errors.OpPrereqError("Can't compute nodes using"
4775
                                 " iallocator '%s': %s" % (self.op.iallocator,
4776
                                                           ial.info))
4777
    if len(ial.nodes) != ial.required_nodes:
4778
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
4779
                                 " of nodes (%s), required %s" %
4780
                                 (self.op.iallocator, len(ial.nodes),
4781
                                  ial.required_nodes))
4782
    self.op.pnode = ial.nodes[0]
4783
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
4784
                 self.op.instance_name, self.op.iallocator,
4785
                 ", ".join(ial.nodes))
4786
    if ial.required_nodes == 2:
4787
      self.op.snode = ial.nodes[1]
4788

    
4789
  def BuildHooksEnv(self):
4790
    """Build hooks env.
4791

4792
    This runs on master, primary and secondary nodes of the instance.
4793

4794
    """
4795
    env = {
4796
      "ADD_MODE": self.op.mode,
4797
      }
4798
    if self.op.mode == constants.INSTANCE_IMPORT:
4799
      env["SRC_NODE"] = self.op.src_node
4800
      env["SRC_PATH"] = self.op.src_path
4801
      env["SRC_IMAGES"] = self.src_images
4802

    
4803
    env.update(_BuildInstanceHookEnv(
4804
      name=self.op.instance_name,
4805
      primary_node=self.op.pnode,
4806
      secondary_nodes=self.secondaries,
4807
      status=self.op.start,
4808
      os_type=self.op.os_type,
4809
      memory=self.be_full[constants.BE_MEMORY],
4810
      vcpus=self.be_full[constants.BE_VCPUS],
4811
      nics=_NICListToTuple(self, self.nics),
4812
      disk_template=self.op.disk_template,
4813
      disks=[(d["size"], d["mode"]) for d in self.disks],
4814
      bep=self.be_full,
4815
      hvp=self.hv_full,
4816
      hypervisor_name=self.op.hypervisor,
4817
    ))
4818

    
4819
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
4820
          self.secondaries)
4821
    return env, nl, nl
4822

    
4823

    
4824
  def CheckPrereq(self):
4825
    """Check prerequisites.
4826

4827
    """
4828
    if (not self.cfg.GetVGName() and
4829
        self.op.disk_template not in constants.DTS_NOT_LVM):
4830
      raise errors.OpPrereqError("Cluster does not support lvm-based"
4831
                                 " instances")
4832

    
4833
    if self.op.mode == constants.INSTANCE_IMPORT:
4834
      src_node = self.op.src_node
4835
      src_path = self.op.src_path
4836

    
4837
      if src_node is None:
4838
        locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
4839
        exp_list = self.rpc.call_export_list(locked_nodes)
4840
        found = False
4841
        for node in exp_list:
4842
          if exp_list[node].fail_msg:
4843
            continue
4844
          if src_path in exp_list[node].payload:
4845
            found = True
4846
            self.op.src_node = src_node = node
4847
            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
4848
                                                       src_path)
4849
            break
4850
        if not found:
4851
          raise errors.OpPrereqError("No export found for relative path %s" %
4852
                                      src_path)
4853

    
4854
      _CheckNodeOnline(self, src_node)
4855
      result = self.rpc.call_export_info(src_node, src_path)
4856
      result.Raise("No export or invalid export found in dir %s" % src_path)
4857

    
4858
      export_info = objects.SerializableConfigParser.Loads(str(result.payload))
4859
      if not export_info.has_section(constants.INISECT_EXP):
4860
        raise errors.ProgrammerError("Corrupted export config")
4861

    
4862
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
4863
      if (int(ei_version) != constants.EXPORT_VERSION):
4864
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
4865
                                   (ei_version, constants.EXPORT_VERSION))
4866

    
4867
      # Check that the new instance doesn't have less disks than the export
4868
      instance_disks = len(self.disks)
4869
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
4870
      if instance_disks < export_disks:
4871
        raise errors.OpPrereqError("Not enough disks to import."
4872
                                   " (instance: %d, export: %d)" %
4873
                                   (instance_disks, export_disks))
4874

    
4875
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
4876
      disk_images = []
4877
      for idx in range(export_disks):
4878
        option = 'disk%d_dump' % idx
4879
        if export_info.has_option(constants.INISECT_INS, option):
4880
          # FIXME: are the old os-es, disk sizes, etc. useful?
4881
          export_name = export_info.get(constants.INISECT_INS, option)
4882
          image = os.path.join(src_path, export_name)
4883
          disk_images.append(image)
4884
        else:
4885
          disk_images.append(False)
4886

    
4887
      self.src_images = disk_images
4888

    
4889
      old_name = export_info.get(constants.INISECT_INS, 'name')
4890
      # FIXME: int() here could throw a ValueError on broken exports
4891
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
4892
      if self.op.instance_name == old_name:
4893
        for idx, nic in enumerate(self.nics):
4894
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
4895
            nic_mac_ini = 'nic%d_mac' % idx
4896
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
4897

    
4898
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
4899
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
4900
    if self.op.start and not self.op.ip_check:
4901
      raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
4902
                                 " adding an instance in start mode")
4903

    
4904
    if self.op.ip_check:
4905
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
4906
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4907
                                   (self.check_ip, self.op.instance_name))
4908

    
4909
    #### mac address generation
4910
    # By generating here the mac address both the allocator and the hooks get
4911
    # the real final mac address rather than the 'auto' or 'generate' value.
4912
    # There is a race condition between the generation and the instance object
4913
    # creation, which means that we know the mac is valid now, but we're not
4914
    # sure it will be when we actually add the instance. If things go bad
4915
    # adding the instance will abort because of a duplicate mac, and the
4916
    # creation job will fail.
4917
    for nic in self.nics:
4918
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
4919
        nic.mac = self.cfg.GenerateMAC()
4920

    
4921
    #### allocator run
4922

    
4923
    if self.op.iallocator is not None:
4924
      self._RunAllocator()
4925

    
4926
    #### node related checks
4927

    
4928
    # check primary node
4929
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
4930
    assert self.pnode is not None, \
4931
      "Cannot retrieve locked node %s" % self.op.pnode
4932
    if pnode.offline:
4933
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
4934
                                 pnode.name)
4935
    if pnode.drained:
4936
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
4937
                                 pnode.name)
4938

    
4939
    self.secondaries = []
4940

    
4941
    # mirror node verification
4942
    if self.op.disk_template in constants.DTS_NET_MIRROR:
4943
      if self.op.snode is None:
4944
        raise errors.OpPrereqError("The networked disk templates need"
4945
                                   " a mirror node")
4946
      if self.op.snode == pnode.name:
4947
        raise errors.OpPrereqError("The secondary node cannot be"
4948
                                   " the primary node.")
4949
      _CheckNodeOnline(self, self.op.snode)
4950
      _CheckNodeNotDrained(self, self.op.snode)
4951
      self.secondaries.append(self.op.snode)
4952

    
4953
    nodenames = [pnode.name] + self.secondaries
4954

    
4955
    req_size = _ComputeDiskSize(self.op.disk_template,
4956
                                self.disks)
4957

    
4958
    # Check lv size requirements
4959
    if req_size is not None:
4960
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
4961
                                         self.op.hypervisor)
4962
      for node in nodenames:
4963
        info = nodeinfo[node]
4964
        info.Raise("Cannot get current information from node %s" % node)
4965
        info = info.payload
4966
        vg_free = info.get('vg_free', None)
4967
        if not isinstance(vg_free, int):
4968
          raise errors.OpPrereqError("Can't compute free disk space on"
4969
                                     " node %s" % node)
4970
        if req_size > vg_free:
4971
          raise errors.OpPrereqError("Not enough disk space on target node %s."
4972
                                     " %d MB available, %d MB required" %
4973
                                     (node, vg_free, req_size))
4974

    
4975
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
4976

    
4977
    # os verification
4978
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
4979
    result.Raise("OS '%s' not in supported os list for primary node %s" %
4980
                 (self.op.os_type, pnode.name), prereq=True)
4981

    
4982
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
4983

    
4984
    # memory check on primary node
4985
    if self.op.start:
4986
      _CheckNodeFreeMemory(self, self.pnode.name,
4987
                           "creating instance %s" % self.op.instance_name,
4988
                           self.be_full[constants.BE_MEMORY],
4989
                           self.op.hypervisor)
4990

    
4991
    self.dry_run_result = list(nodenames)
4992

    
4993
  def Exec(self, feedback_fn):
4994
    """Create and add the instance to the cluster.
4995

4996
    """
4997
    instance = self.op.instance_name
4998
    pnode_name = self.pnode.name
4999

    
5000
    ht_kind = self.op.hypervisor
5001
    if ht_kind in constants.HTS_REQ_PORT:
5002
      network_port = self.cfg.AllocatePort()
5003
    else:
5004
      network_port = None
5005

    
5006
    ##if self.op.vnc_bind_address is None:
5007
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
5008

    
5009
    # this is needed because os.path.join does not accept None arguments
5010
    if self.op.file_storage_dir is None:
5011
      string_file_storage_dir = ""
5012
    else:
5013
      string_file_storage_dir = self.op.file_storage_dir
5014

    
5015
    # build the full file storage dir path
5016
    file_storage_dir = os.path.normpath(os.path.join(
5017
                                        self.cfg.GetFileStorageDir(),
5018
                                        string_file_storage_dir, instance))
5019

    
5020

    
5021
    disks = _GenerateDiskTemplate(self,
5022
                                  self.op.disk_template,
5023
                                  instance, pnode_name,
5024
                                  self.secondaries,
5025
                                  self.disks,
5026
                                  file_storage_dir,
5027
                                  self.op.file_driver,
5028
                                  0)
5029

    
5030
    iobj = objects.Instance(name=instance, os=self.op.os_type,
5031
                            primary_node=pnode_name,
5032
                            nics=self.nics, disks=disks,
5033
                            disk_template=self.op.disk_template,
5034
                            admin_up=False,
5035
                            network_port=network_port,
5036
                            beparams=self.op.beparams,
5037
                            hvparams=self.op.hvparams,
5038
                            hypervisor=self.op.hypervisor,
5039
                            )
5040

    
5041
    feedback_fn("* creating instance disks...")
5042
    try:
5043
      _CreateDisks(self, iobj)
5044
    except errors.OpExecError:
5045
      self.LogWarning("Device creation failed, reverting...")
5046
      try:
5047
        _RemoveDisks(self, iobj)
5048
      finally:
5049
        self.cfg.ReleaseDRBDMinors(instance)
5050
        raise
5051

    
5052
    feedback_fn("adding instance %s to cluster config" % instance)
5053

    
5054
    self.cfg.AddInstance(iobj)
5055
    # Declare that we don't want to remove the instance lock anymore, as we've
5056
    # added the instance to the config
5057
    del self.remove_locks[locking.LEVEL_INSTANCE]
5058
    # Unlock all the nodes
5059
    if self.op.mode == constants.INSTANCE_IMPORT:
5060
      nodes_keep = [self.op.src_node]
5061
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
5062
                       if node != self.op.src_node]
5063
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
5064
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
5065
    else:
5066
      self.context.glm.release(locking.LEVEL_NODE)
5067
      del self.acquired_locks[locking.LEVEL_NODE]
5068

    
5069
    if self.op.wait_for_sync:
5070
      disk_abort = not _WaitForSync(self, iobj)
5071
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
5072
      # make sure the disks are not degraded (still sync-ing is ok)
5073
      time.sleep(15)
5074
      feedback_fn("* checking mirrors status")
5075
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
5076
    else:
5077
      disk_abort = False
5078

    
5079
    if disk_abort:
5080
      _RemoveDisks(self, iobj)
5081
      self.cfg.RemoveInstance(iobj.name)
5082
      # Make sure the instance lock gets removed
5083
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
5084
      raise errors.OpExecError("There are some degraded disks for"
5085
                               " this instance")
5086

    
5087
    feedback_fn("creating os for instance %s on node %s" %
5088
                (instance, pnode_name))
5089

    
5090
    if iobj.disk_template != constants.DT_DISKLESS:
5091
      if self.op.mode == constants.INSTANCE_CREATE:
5092
        feedback_fn("* running the instance OS create scripts...")
5093
        result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
5094
        result.Raise("Could not add os for instance %s"
5095
                     " on node %s" % (instance, pnode_name))
5096

    
5097
      elif self.op.mode == constants.INSTANCE_IMPORT:
5098
        feedback_fn("* running the instance OS import scripts...")
5099
        src_node = self.op.src_node
5100
        src_images = self.src_images
5101
        cluster_name = self.cfg.GetClusterName()
5102
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
5103
                                                         src_node, src_images,
5104
                                                         cluster_name)
5105
        msg = import_result.fail_msg
5106
        if msg:
5107
          self.LogWarning("Error while importing the disk images for instance"
5108
                          " %s on node %s: %s" % (instance, pnode_name, msg))
5109
      else:
5110
        # also checked in the prereq part
5111
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
5112
                                     % self.op.mode)
5113

    
5114
    if self.op.start:
5115
      iobj.admin_up = True
5116
      self.cfg.Update(iobj)
5117
      logging.info("Starting instance %s on node %s", instance, pnode_name)
5118
      feedback_fn("* starting instance...")
5119
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
5120
      result.Raise("Could not start instance")
5121

    
5122
    return list(iobj.all_nodes)
5123

    
5124

    
5125
class LUConnectConsole(NoHooksLU):
5126
  """Connect to an instance's console.
5127

5128
  This is somewhat special in that it returns the command line that
5129
  you need to run on the master node in order to connect to the
5130
  console.
5131

5132
  """
5133
  _OP_REQP = ["instance_name"]
5134
  REQ_BGL = False
5135

    
5136
  def ExpandNames(self):
5137
    self._ExpandAndLockInstance()
5138

    
5139
  def CheckPrereq(self):
5140
    """Check prerequisites.
5141

5142
    This checks that the instance is in the cluster.
5143

5144
    """
5145
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5146
    assert self.instance is not None, \
5147
      "Cannot retrieve locked instance %s" % self.op.instance_name
5148
    _CheckNodeOnline(self, self.instance.primary_node)
5149

    
5150
  def Exec(self, feedback_fn):
5151
    """Connect to the console of an instance
5152

5153
    """
5154
    instance = self.instance
5155
    node = instance.primary_node
5156

    
5157
    node_insts = self.rpc.call_instance_list([node],
5158
                                             [instance.hypervisor])[node]
5159
    node_insts.Raise("Can't get node information from %s" % node)
5160

    
5161
    if instance.name not in node_insts.payload:
5162
      raise errors.OpExecError("Instance %s is not running." % instance.name)
5163

    
5164
    logging.debug("Connecting to console of %s on %s", instance.name, node)
5165

    
5166
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
5167
    cluster = self.cfg.GetClusterInfo()
5168
    # beparams and hvparams are passed separately, to avoid editing the
5169
    # instance and then saving the defaults in the instance itself.
5170
    hvparams = cluster.FillHV(instance)
5171
    beparams = cluster.FillBE(instance)
5172
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
5173

    
5174
    # build ssh cmdline
5175
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
5176

    
5177

    
5178
class LUReplaceDisks(LogicalUnit):
5179
  """Replace the disks of an instance.
5180

5181
  """
5182
  HPATH = "mirrors-replace"
5183
  HTYPE = constants.HTYPE_INSTANCE
5184
  _OP_REQP = ["instance_name", "mode", "disks"]
5185
  REQ_BGL = False
5186

    
5187
  def CheckArguments(self):
5188
    if not hasattr(self.op, "remote_node"):
5189
      self.op.remote_node = None
5190
    if not hasattr(self.op, "iallocator"):
5191
      self.op.iallocator = None
5192

    
5193
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
5194
                                  self.op.iallocator)
5195

    
5196
  def ExpandNames(self):
5197
    self._ExpandAndLockInstance()
5198

    
5199
    if self.op.iallocator is not None:
5200
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5201

    
5202
    elif self.op.remote_node is not None:
5203
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
5204
      if remote_node is None:
5205
        raise errors.OpPrereqError("Node '%s' not known" %
5206
                                   self.op.remote_node)
5207

    
5208
      self.op.remote_node = remote_node
5209

    
5210
      # Warning: do not remove the locking of the new secondary here
5211
      # unless DRBD8.AddChildren is changed to work in parallel;
5212
      # currently it doesn't since parallel invocations of
5213
      # FindUnusedMinor will conflict
5214
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
5215
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5216

    
5217
    else:
5218
      self.needed_locks[locking.LEVEL_NODE] = []
5219
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5220

    
5221
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
5222
                                   self.op.iallocator, self.op.remote_node,
5223
                                   self.op.disks)
5224

    
5225
    self.tasklets.append(self.replacer)
5226

    
5227
  def DeclareLocks(self, level):
5228
    # If we're not already locking all nodes in the set we have to declare the
5229
    # instance's primary/secondary nodes.
5230
    if (level == locking.LEVEL_NODE and
5231
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
5232
      self._LockInstancesNodes()
5233

    
5234
  def BuildHooksEnv(self):
5235
    """Build hooks env.
5236

5237
    This runs on the master, the primary and all the secondaries.
5238

5239
    """
5240
    instance = self.replacer.instance
5241
    env = {
5242
      "MODE": self.op.mode,
5243
      "NEW_SECONDARY": self.op.remote_node,
5244
      "OLD_SECONDARY": instance.secondary_nodes[0],
5245
      }
5246
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5247
    nl = [
5248
      self.cfg.GetMasterNode(),
5249
      instance.primary_node,
5250
      ]
5251
    if self.op.remote_node is not None:
5252
      nl.append(self.op.remote_node)
5253
    return env, nl, nl
5254

    
5255

    
5256
class LUEvacuateNode(LogicalUnit):
5257
  """Relocate the secondary instances from a node.
5258

5259
  """
5260
  HPATH = "node-evacuate"
5261
  HTYPE = constants.HTYPE_NODE
5262
  _OP_REQP = ["node_name"]
5263
  REQ_BGL = False
5264

    
5265
  def CheckArguments(self):
5266
    if not hasattr(self.op, "remote_node"):
5267
      self.op.remote_node = None
5268
    if not hasattr(self.op, "iallocator"):
5269
      self.op.iallocator = None
5270

    
5271
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
5272
                                  self.op.remote_node,
5273
                                  self.op.iallocator)
5274

    
5275
  def ExpandNames(self):
5276
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
5277
    if self.op.node_name is None:
5278
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
5279

    
5280
    self.needed_locks = {}
5281

    
5282
    # Declare node locks
5283
    if self.op.iallocator is not None:
5284
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5285

    
5286
    elif self.op.remote_node is not None:
5287
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
5288
      if remote_node is None:
5289
        raise errors.OpPrereqError("Node '%s' not known" %
5290
                                   self.op.remote_node)
5291

    
5292
      self.op.remote_node = remote_node
5293

    
5294
      # Warning: do not remove the locking of the new secondary here
5295
      # unless DRBD8.AddChildren is changed to work in parallel;
5296
      # currently it doesn't since parallel invocations of
5297
      # FindUnusedMinor will conflict
5298
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
5299
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5300

    
5301
    else:
5302
      raise errors.OpPrereqError("Invalid parameters")
5303

    
5304
    # Create tasklets for replacing disks for all secondary instances on this
5305
    # node
5306
    names = []
5307

    
5308
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
5309
      logging.debug("Replacing disks for instance %s", inst.name)
5310
      names.append(inst.name)
5311

    
5312
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
5313
                                self.op.iallocator, self.op.remote_node, [])
5314
      self.tasklets.append(replacer)
5315

    
5316
    self.instance_names = names
5317

    
5318
    # Declare instance locks
5319
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
5320

    
5321
  def DeclareLocks(self, level):
5322
    # If we're not already locking all nodes in the set we have to declare the
5323
    # instance's primary/secondary nodes.
5324
    if (level == locking.LEVEL_NODE and
5325
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
5326
      self._LockInstancesNodes()
5327

    
5328
  def BuildHooksEnv(self):
5329
    """Build hooks env.
5330

5331
    This runs on the master, the primary and all the secondaries.
5332

5333
    """
5334
    env = {
5335
      "NODE_NAME": self.op.node_name,
5336
      }
5337

    
5338
    nl = [self.cfg.GetMasterNode()]
5339

    
5340
    if self.op.remote_node is not None:
5341
      env["NEW_SECONDARY"] = self.op.remote_node
5342
      nl.append(self.op.remote_node)
5343

    
5344
    return (env, nl, nl)
5345

    
5346

    
5347
class TLReplaceDisks(Tasklet):
5348
  """Replaces disks for an instance.
5349

5350
  Note: Locking is not within the scope of this class.
5351

5352
  """
5353
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
5354
               disks):
5355
    """Initializes this class.
5356

5357
    """
5358
    # Parameters
5359
    self.lu = lu
5360
    self.instance_name = instance_name
5361
    self.mode = mode
5362
    self.iallocator_name = iallocator_name
5363
    self.remote_node = remote_node
5364
    self.disks = disks
5365

    
5366
    # Shortcuts
5367
    self.cfg = lu.cfg
5368
    self.rpc = lu.rpc
5369

    
5370
    # Runtime data
5371
    self.instance = None
5372
    self.new_node = None
5373
    self.target_node = None
5374
    self.other_node = None
5375
    self.remote_node_info = None
5376
    self.node_secondary_ip = None
5377

    
5378
  @staticmethod
5379
  def CheckArguments(mode, remote_node, iallocator):
5380
    """Helper function for users of this class.
5381

5382
    """
5383
    # check for valid parameter combination
5384
    cnt = [remote_node, iallocator].count(None)
5385
    if mode == constants.REPLACE_DISK_CHG:
5386
      if cnt == 2:
5387
        raise errors.OpPrereqError("When changing the secondary either an"
5388
                                   " iallocator script must be used or the"
5389
                                   " new node given")
5390
      elif cnt == 0:
5391
        raise errors.OpPrereqError("Give either the iallocator or the new"
5392
                                   " secondary, not both")
5393
    else: # not replacing the secondary
5394
      if cnt != 2:
5395
        raise errors.OpPrereqError("The iallocator and new node options can"
5396
                                   " be used only when changing the"
5397
                                   " secondary node")
5398

    
5399
  @staticmethod
5400
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
5401
    """Compute a new secondary node using an IAllocator.
5402

5403
    """
5404
    ial = IAllocator(lu.cfg, lu.rpc,
5405
                     mode=constants.IALLOCATOR_MODE_RELOC,
5406
                     name=instance_name,
5407
                     relocate_from=relocate_from)
5408

    
5409
    ial.Run(iallocator_name)
5410

    
5411
    if not ial.success:
5412
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
5413
                                 " %s" % (iallocator_name, ial.info))
5414

    
5415
    if len(ial.nodes) != ial.required_nodes:
5416
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5417
                                 " of nodes (%s), required %s" %
5418
                                 (len(ial.nodes), ial.required_nodes))
5419

    
5420
    remote_node_name = ial.nodes[0]
5421

    
5422
    lu.LogInfo("Selected new secondary for instance '%s': %s",
5423
               instance_name, remote_node_name)
5424

    
5425
    return remote_node_name
5426

    
5427
  def CheckPrereq(self):
5428
    """Check prerequisites.
5429

5430
    This checks that the instance is in the cluster.
5431

5432
    """
5433
    self.instance = self.cfg.GetInstanceInfo(self.instance_name)
5434
    assert self.instance is not None, \
5435
      "Cannot retrieve locked instance %s" % self.instance_name
5436

    
5437
    if self.instance.disk_template != constants.DT_DRBD8:
5438
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
5439
                                 " instances")
5440

    
5441
    if len(self.instance.secondary_nodes) != 1:
5442
      raise errors.OpPrereqError("The instance has a strange layout,"
5443
                                 " expected one secondary but found %d" %
5444
                                 len(self.instance.secondary_nodes))
5445

    
5446
    secondary_node = self.instance.secondary_nodes[0]
5447

    
5448
    if self.iallocator_name is None:
5449
      remote_node = self.remote_node
5450
    else:
5451
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
5452
                                       self.instance.name, secondary_node)
5453

    
5454
    if remote_node is not None:
5455
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
5456
      assert self.remote_node_info is not None, \
5457
        "Cannot retrieve locked node %s" % remote_node
5458
    else:
5459
      self.remote_node_info = None
5460

    
5461
    if remote_node == self.instance.primary_node:
5462
      raise errors.OpPrereqError("The specified node is the primary node of"
5463
                                 " the instance.")
5464

    
5465
    if remote_node == secondary_node:
5466
      raise errors.OpPrereqError("The specified node is already the"
5467
                                 " secondary node of the instance.")
5468

    
5469
    if self.mode == constants.REPLACE_DISK_PRI:
5470
      self.target_node = self.instance.primary_node
5471
      self.other_node = secondary_node
5472
      check_nodes = [self.target_node, self.other_node]
5473

    
5474
    elif self.mode == constants.REPLACE_DISK_SEC:
5475
      self.target_node = secondary_node
5476
      self.other_node = self.instance.primary_node
5477
      check_nodes = [self.target_node, self.other_node]
5478

    
5479
    elif self.mode == constants.REPLACE_DISK_CHG:
5480
      self.new_node = remote_node
5481
      self.other_node = self.instance.primary_node
5482
      self.target_node = secondary_node
5483
      check_nodes = [self.new_node, self.other_node]
5484

    
5485
      _CheckNodeNotDrained(self.lu, remote_node)
5486

    
5487
    else:
5488
      raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
5489
                                   self.mode)
5490

    
5491
    for node in check_nodes:
5492
      _CheckNodeOnline(self.lu, node)
5493

    
5494
    # If not specified all disks should be replaced
5495
    if not self.disks:
5496
      self.disks = range(len(self.instance.disks))
5497

    
5498
    # Check whether disks are valid
5499
    for disk_idx in self.disks:
5500
      self.instance.FindDisk(disk_idx)
5501

    
5502
    # Get secondary node IP addresses
5503
    node_2nd_ip = {}
5504

    
5505
    for node_name in [self.target_node, self.other_node, self.new_node]:
5506
      if node_name is not None:
5507
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
5508

    
5509
    self.node_secondary_ip = node_2nd_ip
5510

    
5511
  def Exec(self, feedback_fn):
5512
    """Execute disk replacement.
5513

5514
    This dispatches the disk replacement to the appropriate handler.
5515

5516
    """
5517
    feedback_fn("Replacing disks for %s" % self.instance.name)
5518

    
5519
    activate_disks = (not self.instance.admin_up)
5520

    
5521
    # Activate the instance disks if we're replacing them on a down instance
5522
    if activate_disks:
5523
      _StartInstanceDisks(self.lu, self.instance, True)
5524

    
5525
    try:
5526
      if self.mode == constants.REPLACE_DISK_CHG:
5527
        return self._ExecDrbd8Secondary()
5528
      else:
5529
        return self._ExecDrbd8DiskOnly()
5530

    
5531
    finally:
5532
      # Deactivate the instance disks if we're replacing them on a down instance
5533
      if activate_disks:
5534
        _SafeShutdownInstanceDisks(self.lu, self.instance)
5535

    
5536
  def _CheckVolumeGroup(self, nodes):
5537
    self.lu.LogInfo("Checking volume groups")
5538

    
5539
    vgname = self.cfg.GetVGName()
5540

    
5541
    # Make sure volume group exists on all involved nodes
5542
    results = self.rpc.call_vg_list(nodes)
5543
    if not results:
5544
      raise errors.OpExecError("Can't list volume groups on the nodes")
5545

    
5546
    for node in nodes:
5547
      res = results[node]
5548
      res.Raise("Error checking node %s" % node)
5549
      if vgname not in res.payload:
5550
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
5551
                                 (vgname, node))
5552

    
5553
  def _CheckDisksExistence(self, nodes):
5554
    # Check disk existence
5555
    for idx, dev in enumerate(self.instance.disks):
5556
      if idx not in self.disks:
5557
        continue
5558

    
5559
      for node in nodes:
5560
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
5561
        self.cfg.SetDiskID(dev, node)
5562

    
5563
        result = self.rpc.call_blockdev_find(node, dev)
5564

    
5565
        msg = result.fail_msg
5566
        if msg or not result.payload:
5567
          if not msg:
5568
            msg = "disk not found"
5569
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
5570
                                   (idx, node, msg))
5571

    
5572
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
5573
    for idx, dev in enumerate(self.instance.disks):
5574
      if idx not in self.disks:
5575
        continue
5576

    
5577
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
5578
                      (idx, node_name))
5579

    
5580
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
5581
                                   ldisk=ldisk):
5582
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
5583
                                 " replace disks for instance %s" %
5584
                                 (node_name, self.instance.name))
5585

    
5586
  def _CreateNewStorage(self, node_name):
5587
    vgname = self.cfg.GetVGName()
5588
    iv_names = {}
5589

    
5590
    for idx, dev in enumerate(self.instance.disks):
5591
      if idx not in self.disks:
5592
        continue
5593

    
5594
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
5595

    
5596
      self.cfg.SetDiskID(dev, node_name)
5597

    
5598
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
5599
      names = _GenerateUniqueNames(self.lu, lv_names)
5600

    
5601
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
5602
                             logical_id=(vgname, names[0]))
5603
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5604
                             logical_id=(vgname, names[1]))
5605

    
5606
      new_lvs = [lv_data, lv_meta]
5607
      old_lvs = dev.children
5608
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
5609

    
5610
      # we pass force_create=True to force the LVM creation
5611
      for new_lv in new_lvs:
5612
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
5613
                        _GetInstanceInfoText(self.instance), False)
5614

    
5615
    return iv_names
5616

    
5617
  def _CheckDevices(self, node_name, iv_names):
5618
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
5619
      self.cfg.SetDiskID(dev, node_name)
5620

    
5621
      result = self.rpc.call_blockdev_find(node_name, dev)
5622

    
5623
      msg = result.fail_msg
5624
      if msg or not result.payload:
5625
        if not msg:
5626
          msg = "disk not found"
5627
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
5628
                                 (name, msg))
5629

    
5630
      if result.payload[5]:
5631
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
5632

    
5633
  def _RemoveOldStorage(self, node_name, iv_names):
5634
    for name, (dev, old_lvs, _) in iv_names.iteritems():
5635
      self.lu.LogInfo("Remove logical volumes for %s" % name)
5636

    
5637
      for lv in old_lvs:
5638
        self.cfg.SetDiskID(lv, node_name)
5639

    
5640
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
5641
        if msg:
5642
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
5643
                             hint="remove unused LVs manually")
5644

    
5645
  def _ExecDrbd8DiskOnly(self):
5646
    """Replace a disk on the primary or secondary for DRBD 8.
5647

5648
    The algorithm for replace is quite complicated:
5649

5650
      1. for each disk to be replaced:
5651

5652
        1. create new LVs on the target node with unique names
5653
        1. detach old LVs from the drbd device
5654
        1. rename old LVs to name_replaced.<time_t>
5655
        1. rename new LVs to old LVs
5656
        1. attach the new LVs (with the old names now) to the drbd device
5657

5658
      1. wait for sync across all devices
5659

5660
      1. for each modified disk:
5661

5662
        1. remove old LVs (which have the name name_replaces.<time_t>)
5663

5664
    Failures are not very well handled.
5665

5666
    """
5667
    steps_total = 6
5668

    
5669
    # Step: check device activation
5670
    self.lu.LogStep(1, steps_total, "Check device existence")
5671
    self._CheckDisksExistence([self.other_node, self.target_node])
5672
    self._CheckVolumeGroup([self.target_node, self.other_node])
5673

    
5674
    # Step: check other node consistency
5675
    self.lu.LogStep(2, steps_total, "Check peer consistency")
5676
    self._CheckDisksConsistency(self.other_node,
5677
                                self.other_node == self.instance.primary_node,
5678
                                False)
5679

    
5680
    # Step: create new storage
5681
    self.lu.LogStep(3, steps_total, "Allocate new storage")
5682
    iv_names = self._CreateNewStorage(self.target_node)
5683

    
5684
    # Step: for each lv, detach+rename*2+attach
5685
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
5686
    for dev, old_lvs, new_lvs in iv_names.itervalues():
5687
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
5688

    
5689
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev, old_lvs)
5690
      result.Raise("Can't detach drbd from local storage on node"
5691
                   " %s for device %s" % (self.target_node, dev.iv_name))
5692
      #dev.children = []
5693
      #cfg.Update(instance)
5694

    
5695
      # ok, we created the new LVs, so now we know we have the needed
5696
      # storage; as such, we proceed on the target node to rename
5697
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
5698
      # using the assumption that logical_id == physical_id (which in
5699
      # turn is the unique_id on that node)
5700

    
5701
      # FIXME(iustin): use a better name for the replaced LVs
5702
      temp_suffix = int(time.time())
5703
      ren_fn = lambda d, suff: (d.physical_id[0],
5704
                                d.physical_id[1] + "_replaced-%s" % suff)
5705

    
5706
      # Build the rename list based on what LVs exist on the node
5707
      rename_old_to_new = []
5708
      for to_ren in old_lvs:
5709
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
5710
        if not result.fail_msg and result.payload:
5711
          # device exists
5712
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
5713

    
5714
      self.lu.LogInfo("Renaming the old LVs on the target node")
5715
      result = self.rpc.call_blockdev_rename(self.target_node, rename_old_to_new)
5716
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
5717

    
5718
      # Now we rename the new LVs to the old LVs
5719
      self.lu.LogInfo("Renaming the new LVs on the target node")
5720
      rename_new_to_old = [(new, old.physical_id)
5721
                           for old, new in zip(old_lvs, new_lvs)]
5722
      result = self.rpc.call_blockdev_rename(self.target_node, rename_new_to_old)
5723
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
5724

    
5725
      for old, new in zip(old_lvs, new_lvs):
5726
        new.logical_id = old.logical_id
5727
        self.cfg.SetDiskID(new, self.target_node)
5728

    
5729
      for disk in old_lvs:
5730
        disk.logical_id = ren_fn(disk, temp_suffix)
5731
        self.cfg.SetDiskID(disk, self.target_node)
5732

    
5733
      # Now that the new lvs have the old name, we can add them to the device
5734
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
5735
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev, new_lvs)
5736
      msg = result.fail_msg
5737
      if msg:
5738
        for new_lv in new_lvs:
5739
          msg2 = self.rpc.call_blockdev_remove(self.target_node, new_lv).fail_msg
5740
          if msg2:
5741
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
5742
                               hint=("cleanup manually the unused logical"
5743
                                     "volumes"))
5744
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
5745

    
5746
      dev.children = new_lvs
5747

    
5748
      self.cfg.Update(self.instance)
5749

    
5750
    # Wait for sync
5751
    # This can fail as the old devices are degraded and _WaitForSync
5752
    # does a combined result over all disks, so we don't check its return value
5753
    self.lu.LogStep(5, steps_total, "Sync devices")
5754
    _WaitForSync(self.lu, self.instance, unlock=True)
5755

    
5756
    # Check all devices manually
5757
    self._CheckDevices(self.instance.primary_node, iv_names)
5758

    
5759
    # Step: remove old storage
5760
    self.lu.LogStep(6, steps_total, "Removing old storage")
5761
    self._RemoveOldStorage(self.target_node, iv_names)
5762

    
5763
  def _ExecDrbd8Secondary(self):
5764
    """Replace the secondary node for DRBD 8.
5765

5766
    The algorithm for replace is quite complicated:
5767
      - for all disks of the instance:
5768
        - create new LVs on the new node with same names
5769
        - shutdown the drbd device on the old secondary
5770
        - disconnect the drbd network on the primary
5771
        - create the drbd device on the new secondary
5772
        - network attach the drbd on the primary, using an artifice:
5773
          the drbd code for Attach() will connect to the network if it
5774
          finds a device which is connected to the good local disks but
5775
          not network enabled
5776
      - wait for sync across all devices
5777
      - remove all disks from the old secondary
5778

5779
    Failures are not very well handled.
5780

5781
    """
5782
    steps_total = 6
5783

    
5784
    # Step: check device activation
5785
    self.lu.LogStep(1, steps_total, "Check device existence")
5786
    self._CheckDisksExistence([self.instance.primary_node])
5787
    self._CheckVolumeGroup([self.instance.primary_node])
5788

    
5789
    # Step: check other node consistency
5790
    self.lu.LogStep(2, steps_total, "Check peer consistency")
5791
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
5792

    
5793
    # Step: create new storage
5794
    self.lu.LogStep(3, steps_total, "Allocate new storage")
5795
    for idx, dev in enumerate(self.instance.disks):
5796
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
5797
                      (self.new_node, idx))
5798
      # we pass force_create=True to force LVM creation
5799
      for new_lv in dev.children:
5800
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
5801
                        _GetInstanceInfoText(self.instance), False)
5802

    
5803
    # Step 4: dbrd minors and drbd setups changes
5804
    # after this, we must manually remove the drbd minors on both the
5805
    # error and the success paths
5806
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
5807
    minors = self.cfg.AllocateDRBDMinor([self.new_node for dev in self.instance.disks],
5808
                                        self.instance.name)
5809
    logging.debug("Allocated minors %r" % (minors,))
5810

    
5811
    iv_names = {}
5812
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
5813
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" % (self.new_node, idx))
5814
      # create new devices on new_node; note that we create two IDs:
5815
      # one without port, so the drbd will be activated without
5816
      # networking information on the new node at this stage, and one
5817
      # with network, for the latter activation in step 4
5818
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
5819
      if self.instance.primary_node == o_node1:
5820
        p_minor = o_minor1
5821
      else:
5822
        p_minor = o_minor2
5823

    
5824
      new_alone_id = (self.instance.primary_node, self.new_node, None, p_minor, new_minor, o_secret)
5825
      new_net_id = (self.instance.primary_node, self.new_node, o_port, p_minor, new_minor, o_secret)
5826

    
5827
      iv_names[idx] = (dev, dev.children, new_net_id)
5828
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
5829
                    new_net_id)
5830
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
5831
                              logical_id=new_alone_id,
5832
                              children=dev.children,
5833
                              size=dev.size)
5834
      try:
5835
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
5836
                              _GetInstanceInfoText(self.instance), False)
5837
      except errors.GenericError:
5838
        self.cfg.ReleaseDRBDMinors(self.instance.name)
5839
        raise
5840

    
5841
    # We have new devices, shutdown the drbd on the old secondary
5842
    for idx, dev in enumerate(self.instance.disks):
5843
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
5844
      self.cfg.SetDiskID(dev, self.target_node)
5845
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
5846
      if msg:
5847
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
5848
                           "node: %s" % (idx, msg),
5849
                           hint=("Please cleanup this device manually as"
5850
                                 " soon as possible"))
5851

    
5852
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
5853
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node], self.node_secondary_ip,
5854
                                               self.instance.disks)[self.instance.primary_node]
5855

    
5856
    msg = result.fail_msg
5857
    if msg:
5858
      # detaches didn't succeed (unlikely)
5859
      self.cfg.ReleaseDRBDMinors(self.instance.name)
5860
      raise errors.OpExecError("Can't detach the disks from the network on"
5861
                               " old node: %s" % (msg,))
5862

    
5863
    # if we managed to detach at least one, we update all the disks of
5864
    # the instance to point to the new secondary
5865
    self.lu.LogInfo("Updating instance configuration")
5866
    for dev, _, new_logical_id in iv_names.itervalues():
5867
      dev.logical_id = new_logical_id
5868
      self.cfg.SetDiskID(dev, self.instance.primary_node)
5869

    
5870
    self.cfg.Update(self.instance)
5871

    
5872
    # and now perform the drbd attach
5873
    self.lu.LogInfo("Attaching primary drbds to new secondary"
5874
                    " (standalone => connected)")
5875
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node, self.new_node], self.node_secondary_ip,
5876
                                           self.instance.disks, self.instance.name,
5877
                                           False)
5878
    for to_node, to_result in result.items():
5879
      msg = to_result.fail_msg
5880
      if msg:
5881
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s", to_node, msg,
5882
                           hint=("please do a gnt-instance info to see the"
5883
                                 " status of disks"))
5884

    
5885
    # Wait for sync
5886
    # This can fail as the old devices are degraded and _WaitForSync
5887
    # does a combined result over all disks, so we don't check its return value
5888
    self.lu.LogStep(5, steps_total, "Sync devices")
5889
    _WaitForSync(self.lu, self.instance, unlock=True)
5890

    
5891
    # Check all devices manually
5892
    self._CheckDevices(self.instance.primary_node, iv_names)
5893

    
5894
    # Step: remove old storage
5895
    self.lu.LogStep(6, steps_total, "Removing old storage")
5896
    self._RemoveOldStorage(self.target_node, iv_names)
5897

    
5898

    
5899
class LUGrowDisk(LogicalUnit):
5900
  """Grow a disk of an instance.
5901

5902
  """
5903
  HPATH = "disk-grow"
5904
  HTYPE = constants.HTYPE_INSTANCE
5905
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
5906
  REQ_BGL = False
5907

    
5908
  def ExpandNames(self):
5909
    self._ExpandAndLockInstance()
5910
    self.needed_locks[locking.LEVEL_NODE] = []
5911
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5912

    
5913
  def DeclareLocks(self, level):
5914
    if level == locking.LEVEL_NODE:
5915
      self._LockInstancesNodes()
5916

    
5917
  def BuildHooksEnv(self):
5918
    """Build hooks env.
5919

5920
    This runs on the master, the primary and all the secondaries.
5921

5922
    """
5923
    env = {
5924
      "DISK": self.op.disk,
5925
      "AMOUNT": self.op.amount,
5926
      }
5927
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5928
    nl = [
5929
      self.cfg.GetMasterNode(),
5930
      self.instance.primary_node,
5931
      ]
5932
    return env, nl, nl
5933

    
5934
  def CheckPrereq(self):
5935
    """Check prerequisites.
5936

5937
    This checks that the instance is in the cluster.
5938

5939
    """
5940
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5941
    assert instance is not None, \
5942
      "Cannot retrieve locked instance %s" % self.op.instance_name
5943
    nodenames = list(instance.all_nodes)
5944
    for node in nodenames:
5945
      _CheckNodeOnline(self, node)
5946

    
5947

    
5948
    self.instance = instance
5949

    
5950
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
5951
      raise errors.OpPrereqError("Instance's disk layout does not support"
5952
                                 " growing.")
5953

    
5954
    self.disk = instance.FindDisk(self.op.disk)
5955

    
5956
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
5957
                                       instance.hypervisor)
5958
    for node in nodenames:
5959
      info = nodeinfo[node]
5960
      info.Raise("Cannot get current information from node %s" % node)
5961
      vg_free = info.payload.get('vg_free', None)
5962
      if not isinstance(vg_free, int):
5963
        raise errors.OpPrereqError("Can't compute free disk space on"
5964
                                   " node %s" % node)
5965
      if self.op.amount > vg_free:
5966
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
5967
                                   " %d MiB available, %d MiB required" %
5968
                                   (node, vg_free, self.op.amount))
5969

    
5970
  def Exec(self, feedback_fn):
5971
    """Execute disk grow.
5972

5973
    """
5974
    instance = self.instance
5975
    disk = self.disk
5976
    for node in instance.all_nodes:
5977
      self.cfg.SetDiskID(disk, node)
5978
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
5979
      result.Raise("Grow request failed to node %s" % node)
5980
    disk.RecordGrow(self.op.amount)
5981
    self.cfg.Update(instance)
5982
    if self.op.wait_for_sync:
5983
      disk_abort = not _WaitForSync(self, instance)
5984
      if disk_abort:
5985
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
5986
                             " status.\nPlease check the instance.")
5987

    
5988

    
5989
class LUQueryInstanceData(NoHooksLU):
5990
  """Query runtime instance data.
5991

5992
  """
5993
  _OP_REQP = ["instances", "static"]
5994
  REQ_BGL = False
5995

    
5996
  def ExpandNames(self):
5997
    self.needed_locks = {}
5998
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
5999

    
6000
    if not isinstance(self.op.instances, list):
6001
      raise errors.OpPrereqError("Invalid argument type 'instances'")
6002

    
6003
    if self.op.instances:
6004
      self.wanted_names = []
6005
      for name in self.op.instances:
6006
        full_name = self.cfg.ExpandInstanceName(name)
6007
        if full_name is None:
6008
          raise errors.OpPrereqError("Instance '%s' not known" % name)
6009
        self.wanted_names.append(full_name)
6010
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
6011
    else:
6012
      self.wanted_names = None
6013
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
6014

    
6015
    self.needed_locks[locking.LEVEL_NODE] = []
6016
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6017

    
6018
  def DeclareLocks(self, level):
6019
    if level == locking.LEVEL_NODE:
6020
      self._LockInstancesNodes()
6021

    
6022
  def CheckPrereq(self):
6023
    """Check prerequisites.
6024

6025
    This only checks the optional instance list against the existing names.
6026

6027
    """
6028
    if self.wanted_names is None:
6029
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
6030

    
6031
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
6032
                             in self.wanted_names]
6033
    return
6034

    
6035
  def _ComputeDiskStatus(self, instance, snode, dev):
6036
    """Compute block device status.
6037

6038
    """
6039
    static = self.op.static
6040
    if not static:
6041
      self.cfg.SetDiskID(dev, instance.primary_node)
6042
      dev_pstatus = self.rpc.call_blockdev_find(instance.primary_node, dev)
6043
      if dev_pstatus.offline:
6044
        dev_pstatus = None
6045
      else:
6046
        dev_pstatus.Raise("Can't compute disk status for %s" % instance.name)
6047
        dev_pstatus = dev_pstatus.payload
6048
    else:
6049
      dev_pstatus = None
6050

    
6051
    if dev.dev_type in constants.LDS_DRBD:
6052
      # we change the snode then (otherwise we use the one passed in)
6053
      if dev.logical_id[0] == instance.primary_node:
6054
        snode = dev.logical_id[1]
6055
      else:
6056
        snode = dev.logical_id[0]
6057

    
6058
    if snode and not static:
6059
      self.cfg.SetDiskID(dev, snode)
6060
      dev_sstatus = self.rpc.call_blockdev_find(snode, dev)
6061
      if dev_sstatus.offline:
6062
        dev_sstatus = None
6063
      else:
6064
        dev_sstatus.Raise("Can't compute disk status for %s" % instance.name)
6065
        dev_sstatus = dev_sstatus.payload
6066
    else:
6067
      dev_sstatus = None
6068

    
6069
    if dev.children:
6070
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
6071
                      for child in dev.children]
6072
    else:
6073
      dev_children = []
6074

    
6075
    data = {
6076
      "iv_name": dev.iv_name,
6077
      "dev_type": dev.dev_type,
6078
      "logical_id": dev.logical_id,
6079
      "physical_id": dev.physical_id,
6080
      "pstatus": dev_pstatus,
6081
      "sstatus": dev_sstatus,
6082
      "children": dev_children,
6083
      "mode": dev.mode,
6084
      "size": dev.size,
6085
      }
6086

    
6087
    return data
6088

    
6089
  def Exec(self, feedback_fn):
6090
    """Gather and return data"""
6091
    result = {}
6092

    
6093
    cluster = self.cfg.GetClusterInfo()
6094

    
6095
    for instance in self.wanted_instances:
6096
      if not self.op.static:
6097
        remote_info = self.rpc.call_instance_info(instance.primary_node,
6098
                                                  instance.name,
6099
                                                  instance.hypervisor)
6100
        remote_info.Raise("Error checking node %s" % instance.primary_node)
6101
        remote_info = remote_info.payload
6102
        if remote_info and "state" in remote_info:
6103
          remote_state = "up"
6104
        else:
6105
          remote_state = "down"
6106
      else:
6107
        remote_state = None
6108
      if instance.admin_up:
6109
        config_state = "up"
6110
      else:
6111
        config_state = "down"
6112

    
6113
      disks = [self._ComputeDiskStatus(instance, None, device)
6114
               for device in instance.disks]
6115

    
6116
      idict = {
6117
        "name": instance.name,
6118
        "config_state": config_state,
6119
        "run_state": remote_state,
6120
        "pnode": instance.primary_node,
6121
        "snodes": instance.secondary_nodes,
6122
        "os": instance.os,
6123
        # this happens to be the same format used for hooks
6124
        "nics": _NICListToTuple(self, instance.nics),
6125
        "disks": disks,
6126
        "hypervisor": instance.hypervisor,
6127
        "network_port": instance.network_port,
6128
        "hv_instance": instance.hvparams,
6129
        "hv_actual": cluster.FillHV(instance),
6130
        "be_instance": instance.beparams,
6131
        "be_actual": cluster.FillBE(instance),
6132
        }
6133

    
6134
      result[instance.name] = idict
6135

    
6136
    return result
6137

    
6138

    
6139
class LUSetInstanceParams(LogicalUnit):
6140
  """Modifies an instances's parameters.
6141

6142
  """
6143
  HPATH = "instance-modify"
6144
  HTYPE = constants.HTYPE_INSTANCE
6145
  _OP_REQP = ["instance_name"]
6146
  REQ_BGL = False
6147

    
6148
  def CheckArguments(self):
6149
    if not hasattr(self.op, 'nics'):
6150
      self.op.nics = []
6151
    if not hasattr(self.op, 'disks'):
6152
      self.op.disks = []
6153
    if not hasattr(self.op, 'beparams'):
6154
      self.op.beparams = {}
6155
    if not hasattr(self.op, 'hvparams'):
6156
      self.op.hvparams = {}
6157
    self.op.force = getattr(self.op, "force", False)
6158
    if not (self.op.nics or self.op.disks or
6159
            self.op.hvparams or self.op.beparams):
6160
      raise errors.OpPrereqError("No changes submitted")
6161

    
6162
    # Disk validation
6163
    disk_addremove = 0
6164
    for disk_op, disk_dict in self.op.disks:
6165
      if disk_op == constants.DDM_REMOVE:
6166
        disk_addremove += 1
6167
        continue
6168
      elif disk_op == constants.DDM_ADD:
6169
        disk_addremove += 1
6170
      else:
6171
        if not isinstance(disk_op, int):
6172
          raise errors.OpPrereqError("Invalid disk index")
6173
        if not isinstance(disk_dict, dict):
6174
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
6175
          raise errors.OpPrereqError(msg)
6176

    
6177
      if disk_op == constants.DDM_ADD:
6178
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
6179
        if mode not in constants.DISK_ACCESS_SET:
6180
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
6181
        size = disk_dict.get('size', None)
6182
        if size is None:
6183
          raise errors.OpPrereqError("Required disk parameter size missing")
6184
        try:
6185
          size = int(size)
6186
        except ValueError, err:
6187
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
6188
                                     str(err))
6189
        disk_dict['size'] = size
6190
      else:
6191
        # modification of disk
6192
        if 'size' in disk_dict:
6193
          raise errors.OpPrereqError("Disk size change not possible, use"
6194
                                     " grow-disk")
6195

    
6196
    if disk_addremove > 1:
6197
      raise errors.OpPrereqError("Only one disk add or remove operation"
6198
                                 " supported at a time")
6199

    
6200
    # NIC validation
6201
    nic_addremove = 0
6202
    for nic_op, nic_dict in self.op.nics:
6203
      if nic_op == constants.DDM_REMOVE:
6204
        nic_addremove += 1
6205
        continue
6206
      elif nic_op == constants.DDM_ADD:
6207
        nic_addremove += 1
6208
      else:
6209
        if not isinstance(nic_op, int):
6210
          raise errors.OpPrereqError("Invalid nic index")
6211
        if not isinstance(nic_dict, dict):
6212
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
6213
          raise errors.OpPrereqError(msg)
6214

    
6215
      # nic_dict should be a dict
6216
      nic_ip = nic_dict.get('ip', None)
6217
      if nic_ip is not None:
6218
        if nic_ip.lower() == constants.VALUE_NONE:
6219
          nic_dict['ip'] = None
6220
        else:
6221
          if not utils.IsValidIP(nic_ip):
6222
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
6223

    
6224
      nic_bridge = nic_dict.get('bridge', None)
6225
      nic_link = nic_dict.get('link', None)
6226
      if nic_bridge and nic_link:
6227
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6228
                                   " at the same time")
6229
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
6230
        nic_dict['bridge'] = None
6231
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
6232
        nic_dict['link'] = None
6233

    
6234
      if nic_op == constants.DDM_ADD:
6235
        nic_mac = nic_dict.get('mac', None)
6236
        if nic_mac is None:
6237
          nic_dict['mac'] = constants.VALUE_AUTO
6238

    
6239
      if 'mac' in nic_dict:
6240
        nic_mac = nic_dict['mac']
6241
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6242
          if not utils.IsValidMac(nic_mac):
6243
            raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
6244
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
6245
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
6246
                                     " modifying an existing nic")
6247

    
6248
    if nic_addremove > 1:
6249
      raise errors.OpPrereqError("Only one NIC add or remove operation"
6250
                                 " supported at a time")
6251

    
6252
  def ExpandNames(self):
6253
    self._ExpandAndLockInstance()
6254
    self.needed_locks[locking.LEVEL_NODE] = []
6255
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6256

    
6257
  def DeclareLocks(self, level):
6258
    if level == locking.LEVEL_NODE:
6259
      self._LockInstancesNodes()
6260

    
6261
  def BuildHooksEnv(self):
6262
    """Build hooks env.
6263

6264
    This runs on the master, primary and secondaries.
6265

6266
    """
6267
    args = dict()
6268
    if constants.BE_MEMORY in self.be_new:
6269
      args['memory'] = self.be_new[constants.BE_MEMORY]
6270
    if constants.BE_VCPUS in self.be_new:
6271
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
6272
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
6273
    # information at all.
6274
    if self.op.nics:
6275
      args['nics'] = []
6276
      nic_override = dict(self.op.nics)
6277
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
6278
      for idx, nic in enumerate(self.instance.nics):
6279
        if idx in nic_override:
6280
          this_nic_override = nic_override[idx]
6281
        else:
6282
          this_nic_override = {}
6283
        if 'ip' in this_nic_override:
6284
          ip = this_nic_override['ip']
6285
        else:
6286
          ip = nic.ip
6287
        if 'mac' in this_nic_override:
6288
          mac = this_nic_override['mac']
6289
        else:
6290
          mac = nic.mac
6291
        if idx in self.nic_pnew:
6292
          nicparams = self.nic_pnew[idx]
6293
        else:
6294
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
6295
        mode = nicparams[constants.NIC_MODE]
6296
        link = nicparams[constants.NIC_LINK]
6297
        args['nics'].append((ip, mac, mode, link))
6298
      if constants.DDM_ADD in nic_override:
6299
        ip = nic_override[constants.DDM_ADD].get('ip', None)
6300
        mac = nic_override[constants.DDM_ADD]['mac']
6301
        nicparams = self.nic_pnew[constants.DDM_ADD]
6302
        mode = nicparams[constants.NIC_MODE]
6303
        link = nicparams[constants.NIC_LINK]
6304
        args['nics'].append((ip, mac, mode, link))
6305
      elif constants.DDM_REMOVE in nic_override:
6306
        del args['nics'][-1]
6307

    
6308
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
6309
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6310
    return env, nl, nl
6311

    
6312
  def _GetUpdatedParams(self, old_params, update_dict,
6313
                        default_values, parameter_types):
6314
    """Return the new params dict for the given params.
6315

6316
    @type old_params: dict
6317
    @param old_params: old parameters
6318
    @type update_dict: dict
6319
    @param update_dict: dict containing new parameter values,
6320
                        or constants.VALUE_DEFAULT to reset the
6321
                        parameter to its default value
6322
    @type default_values: dict
6323
    @param default_values: default values for the filled parameters
6324
    @type parameter_types: dict
6325
    @param parameter_types: dict mapping target dict keys to types
6326
                            in constants.ENFORCEABLE_TYPES
6327
    @rtype: (dict, dict)
6328
    @return: (new_parameters, filled_parameters)
6329

6330
    """
6331
    params_copy = copy.deepcopy(old_params)
6332
    for key, val in update_dict.iteritems():
6333
      if val == constants.VALUE_DEFAULT:
6334
        try:
6335
          del params_copy[key]
6336
        except KeyError:
6337
          pass
6338
      else:
6339
        params_copy[key] = val
6340
    utils.ForceDictType(params_copy, parameter_types)
6341
    params_filled = objects.FillDict(default_values, params_copy)
6342
    return (params_copy, params_filled)
6343

    
6344
  def CheckPrereq(self):
6345
    """Check prerequisites.
6346

6347
    This only checks the instance list against the existing names.
6348

6349
    """
6350
    self.force = self.op.force
6351

    
6352
    # checking the new params on the primary/secondary nodes
6353

    
6354
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6355
    cluster = self.cluster = self.cfg.GetClusterInfo()
6356
    assert self.instance is not None, \
6357
      "Cannot retrieve locked instance %s" % self.op.instance_name
6358
    pnode = instance.primary_node
6359
    nodelist = list(instance.all_nodes)
6360

    
6361
    # hvparams processing
6362
    if self.op.hvparams:
6363
      i_hvdict, hv_new = self._GetUpdatedParams(
6364
                             instance.hvparams, self.op.hvparams,
6365
                             cluster.hvparams[instance.hypervisor],
6366
                             constants.HVS_PARAMETER_TYPES)
6367
      # local check
6368
      hypervisor.GetHypervisor(
6369
        instance.hypervisor).CheckParameterSyntax(hv_new)
6370
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
6371
      self.hv_new = hv_new # the new actual values
6372
      self.hv_inst = i_hvdict # the new dict (without defaults)
6373
    else:
6374
      self.hv_new = self.hv_inst = {}
6375

    
6376
    # beparams processing
6377
    if self.op.beparams:
6378
      i_bedict, be_new = self._GetUpdatedParams(
6379
                             instance.beparams, self.op.beparams,
6380
                             cluster.beparams[constants.PP_DEFAULT],
6381
                             constants.BES_PARAMETER_TYPES)
6382
      self.be_new = be_new # the new actual values
6383
      self.be_inst = i_bedict # the new dict (without defaults)
6384
    else:
6385
      self.be_new = self.be_inst = {}
6386

    
6387
    self.warn = []
6388

    
6389
    if constants.BE_MEMORY in self.op.beparams and not self.force:
6390
      mem_check_list = [pnode]
6391
      if be_new[constants.BE_AUTO_BALANCE]:
6392
        # either we changed auto_balance to yes or it was from before
6393
        mem_check_list.extend(instance.secondary_nodes)
6394
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
6395
                                                  instance.hypervisor)
6396
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
6397
                                         instance.hypervisor)
6398
      pninfo = nodeinfo[pnode]
6399
      msg = pninfo.fail_msg
6400
      if msg:
6401
        # Assume the primary node is unreachable and go ahead
6402
        self.warn.append("Can't get info from primary node %s: %s" %
6403
                         (pnode,  msg))
6404
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
6405
        self.warn.append("Node data from primary node %s doesn't contain"
6406
                         " free memory information" % pnode)
6407
      elif instance_info.fail_msg:
6408
        self.warn.append("Can't get instance runtime information: %s" %
6409
                        instance_info.fail_msg)
6410
      else:
6411
        if instance_info.payload:
6412
          current_mem = int(instance_info.payload['memory'])
6413
        else:
6414
          # Assume instance not running
6415
          # (there is a slight race condition here, but it's not very probable,
6416
          # and we have no other way to check)
6417
          current_mem = 0
6418
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
6419
                    pninfo.payload['memory_free'])
6420
        if miss_mem > 0:
6421
          raise errors.OpPrereqError("This change will prevent the instance"
6422
                                     " from starting, due to %d MB of memory"
6423
                                     " missing on its primary node" % miss_mem)
6424

    
6425
      if be_new[constants.BE_AUTO_BALANCE]:
6426
        for node, nres in nodeinfo.items():
6427
          if node not in instance.secondary_nodes:
6428
            continue
6429
          msg = nres.fail_msg
6430
          if msg:
6431
            self.warn.append("Can't get info from secondary node %s: %s" %
6432
                             (node, msg))
6433
          elif not isinstance(nres.payload.get('memory_free', None), int):
6434
            self.warn.append("Secondary node %s didn't return free"
6435
                             " memory information" % node)
6436
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
6437
            self.warn.append("Not enough memory to failover instance to"
6438
                             " secondary node %s" % node)
6439

    
6440
    # NIC processing
6441
    self.nic_pnew = {}
6442
    self.nic_pinst = {}
6443
    for nic_op, nic_dict in self.op.nics:
6444
      if nic_op == constants.DDM_REMOVE:
6445
        if not instance.nics:
6446
          raise errors.OpPrereqError("Instance has no NICs, cannot remove")
6447
        continue
6448
      if nic_op != constants.DDM_ADD:
6449
        # an existing nic
6450
        if nic_op < 0 or nic_op >= len(instance.nics):
6451
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
6452
                                     " are 0 to %d" %
6453
                                     (nic_op, len(instance.nics)))
6454
        old_nic_params = instance.nics[nic_op].nicparams
6455
        old_nic_ip = instance.nics[nic_op].ip
6456
      else:
6457
        old_nic_params = {}
6458
        old_nic_ip = None
6459

    
6460
      update_params_dict = dict([(key, nic_dict[key])
6461
                                 for key in constants.NICS_PARAMETERS
6462
                                 if key in nic_dict])
6463

    
6464
      if 'bridge' in nic_dict:
6465
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
6466

    
6467
      new_nic_params, new_filled_nic_params = \
6468
          self._GetUpdatedParams(old_nic_params, update_params_dict,
6469
                                 cluster.nicparams[constants.PP_DEFAULT],
6470
                                 constants.NICS_PARAMETER_TYPES)
6471
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
6472
      self.nic_pinst[nic_op] = new_nic_params
6473
      self.nic_pnew[nic_op] = new_filled_nic_params
6474
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
6475

    
6476
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
6477
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
6478
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
6479
        if msg:
6480
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
6481
          if self.force:
6482
            self.warn.append(msg)
6483
          else:
6484
            raise errors.OpPrereqError(msg)
6485
      if new_nic_mode == constants.NIC_MODE_ROUTED:
6486
        if 'ip' in nic_dict:
6487
          nic_ip = nic_dict['ip']
6488
        else:
6489
          nic_ip = old_nic_ip
6490
        if nic_ip is None:
6491
          raise errors.OpPrereqError('Cannot set the nic ip to None'
6492
                                     ' on a routed nic')
6493
      if 'mac' in nic_dict:
6494
        nic_mac = nic_dict['mac']
6495
        if nic_mac is None:
6496
          raise errors.OpPrereqError('Cannot set the nic mac to None')
6497
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6498
          # otherwise generate the mac
6499
          nic_dict['mac'] = self.cfg.GenerateMAC()
6500
        else:
6501
          # or validate/reserve the current one
6502
          if self.cfg.IsMacInUse(nic_mac):
6503
            raise errors.OpPrereqError("MAC address %s already in use"
6504
                                       " in cluster" % nic_mac)
6505

    
6506
    # DISK processing
6507
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
6508
      raise errors.OpPrereqError("Disk operations not supported for"
6509
                                 " diskless instances")
6510
    for disk_op, disk_dict in self.op.disks:
6511
      if disk_op == constants.DDM_REMOVE:
6512
        if len(instance.disks) == 1:
6513
          raise errors.OpPrereqError("Cannot remove the last disk of"
6514
                                     " an instance")
6515
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
6516
        ins_l = ins_l[pnode]
6517
        msg = ins_l.fail_msg
6518
        if msg:
6519
          raise errors.OpPrereqError("Can't contact node %s: %s" %
6520
                                     (pnode, msg))
6521
        if instance.name in ins_l.payload:
6522
          raise errors.OpPrereqError("Instance is running, can't remove"
6523
                                     " disks.")
6524

    
6525
      if (disk_op == constants.DDM_ADD and
6526
          len(instance.nics) >= constants.MAX_DISKS):
6527
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
6528
                                   " add more" % constants.MAX_DISKS)
6529
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
6530
        # an existing disk
6531
        if disk_op < 0 or disk_op >= len(instance.disks):
6532
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
6533
                                     " are 0 to %d" %
6534
                                     (disk_op, len(instance.disks)))
6535

    
6536
    return
6537

    
6538
  def Exec(self, feedback_fn):
6539
    """Modifies an instance.
6540

6541
    All parameters take effect only at the next restart of the instance.
6542

6543
    """
6544
    # Process here the warnings from CheckPrereq, as we don't have a
6545
    # feedback_fn there.
6546
    for warn in self.warn:
6547
      feedback_fn("WARNING: %s" % warn)
6548

    
6549
    result = []
6550
    instance = self.instance
6551
    cluster = self.cluster
6552
    # disk changes
6553
    for disk_op, disk_dict in self.op.disks:
6554
      if disk_op == constants.DDM_REMOVE:
6555
        # remove the last disk
6556
        device = instance.disks.pop()
6557
        device_idx = len(instance.disks)
6558
        for node, disk in device.ComputeNodeTree(instance.primary_node):
6559
          self.cfg.SetDiskID(disk, node)
6560
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
6561
          if msg:
6562
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
6563
                            " continuing anyway", device_idx, node, msg)
6564
        result.append(("disk/%d" % device_idx, "remove"))
6565
      elif disk_op == constants.DDM_ADD:
6566
        # add a new disk
6567
        if instance.disk_template == constants.DT_FILE:
6568
          file_driver, file_path = instance.disks[0].logical_id
6569
          file_path = os.path.dirname(file_path)
6570
        else:
6571
          file_driver = file_path = None
6572
        disk_idx_base = len(instance.disks)
6573
        new_disk = _GenerateDiskTemplate(self,
6574
                                         instance.disk_template,
6575
                                         instance.name, instance.primary_node,
6576
                                         instance.secondary_nodes,
6577
                                         [disk_dict],
6578
                                         file_path,
6579
                                         file_driver,
6580
                                         disk_idx_base)[0]
6581
        instance.disks.append(new_disk)
6582
        info = _GetInstanceInfoText(instance)
6583

    
6584
        logging.info("Creating volume %s for instance %s",
6585
                     new_disk.iv_name, instance.name)
6586
        # Note: this needs to be kept in sync with _CreateDisks
6587
        #HARDCODE
6588
        for node in instance.all_nodes:
6589
          f_create = node == instance.primary_node
6590
          try:
6591
            _CreateBlockDev(self, node, instance, new_disk,
6592
                            f_create, info, f_create)
6593
          except errors.OpExecError, err:
6594
            self.LogWarning("Failed to create volume %s (%s) on"
6595
                            " node %s: %s",
6596
                            new_disk.iv_name, new_disk, node, err)
6597
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
6598
                       (new_disk.size, new_disk.mode)))
6599
      else:
6600
        # change a given disk
6601
        instance.disks[disk_op].mode = disk_dict['mode']
6602
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
6603
    # NIC changes
6604
    for nic_op, nic_dict in self.op.nics:
6605
      if nic_op == constants.DDM_REMOVE:
6606
        # remove the last nic
6607
        del instance.nics[-1]
6608
        result.append(("nic.%d" % len(instance.nics), "remove"))
6609
      elif nic_op == constants.DDM_ADD:
6610
        # mac and bridge should be set, by now
6611
        mac = nic_dict['mac']
6612
        ip = nic_dict.get('ip', None)
6613
        nicparams = self.nic_pinst[constants.DDM_ADD]
6614
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
6615
        instance.nics.append(new_nic)
6616
        result.append(("nic.%d" % (len(instance.nics) - 1),
6617
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
6618
                       (new_nic.mac, new_nic.ip,
6619
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
6620
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
6621
                       )))
6622
      else:
6623
        for key in 'mac', 'ip':
6624
          if key in nic_dict:
6625
            setattr(instance.nics[nic_op], key, nic_dict[key])
6626
        if nic_op in self.nic_pnew:
6627
          instance.nics[nic_op].nicparams = self.nic_pnew[nic_op]
6628
        for key, val in nic_dict.iteritems():
6629
          result.append(("nic.%s/%d" % (key, nic_op), val))
6630

    
6631
    # hvparams changes
6632
    if self.op.hvparams:
6633
      instance.hvparams = self.hv_inst
6634
      for key, val in self.op.hvparams.iteritems():
6635
        result.append(("hv/%s" % key, val))
6636

    
6637
    # beparams changes
6638
    if self.op.beparams:
6639
      instance.beparams = self.be_inst
6640
      for key, val in self.op.beparams.iteritems():
6641
        result.append(("be/%s" % key, val))
6642

    
6643
    self.cfg.Update(instance)
6644

    
6645
    return result
6646

    
6647

    
6648
class LUQueryExports(NoHooksLU):
6649
  """Query the exports list
6650

6651
  """
6652
  _OP_REQP = ['nodes']
6653
  REQ_BGL = False
6654

    
6655
  def ExpandNames(self):
6656
    self.needed_locks = {}
6657
    self.share_locks[locking.LEVEL_NODE] = 1
6658
    if not self.op.nodes:
6659
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6660
    else:
6661
      self.needed_locks[locking.LEVEL_NODE] = \
6662
        _GetWantedNodes(self, self.op.nodes)
6663

    
6664
  def CheckPrereq(self):
6665
    """Check prerequisites.
6666

6667
    """
6668
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
6669

    
6670
  def Exec(self, feedback_fn):
6671
    """Compute the list of all the exported system images.
6672

6673
    @rtype: dict
6674
    @return: a dictionary with the structure node->(export-list)
6675
        where export-list is a list of the instances exported on
6676
        that node.
6677

6678
    """
6679
    rpcresult = self.rpc.call_export_list(self.nodes)
6680
    result = {}
6681
    for node in rpcresult:
6682
      if rpcresult[node].fail_msg:
6683
        result[node] = False
6684
      else:
6685
        result[node] = rpcresult[node].payload
6686

    
6687
    return result
6688

    
6689

    
6690
class LUExportInstance(LogicalUnit):
6691
  """Export an instance to an image in the cluster.
6692

6693
  """
6694
  HPATH = "instance-export"
6695
  HTYPE = constants.HTYPE_INSTANCE
6696
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
6697
  REQ_BGL = False
6698

    
6699
  def ExpandNames(self):
6700
    self._ExpandAndLockInstance()
6701
    # FIXME: lock only instance primary and destination node
6702
    #
6703
    # Sad but true, for now we have do lock all nodes, as we don't know where
6704
    # the previous export might be, and and in this LU we search for it and
6705
    # remove it from its current node. In the future we could fix this by:
6706
    #  - making a tasklet to search (share-lock all), then create the new one,
6707
    #    then one to remove, after
6708
    #  - removing the removal operation altogether
6709
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6710

    
6711
  def DeclareLocks(self, level):
6712
    """Last minute lock declaration."""
6713
    # All nodes are locked anyway, so nothing to do here.
6714

    
6715
  def BuildHooksEnv(self):
6716
    """Build hooks env.
6717

6718
    This will run on the master, primary node and target node.
6719

6720
    """
6721
    env = {
6722
      "EXPORT_NODE": self.op.target_node,
6723
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
6724
      }
6725
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6726
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
6727
          self.op.target_node]
6728
    return env, nl, nl
6729

    
6730
  def CheckPrereq(self):
6731
    """Check prerequisites.
6732

6733
    This checks that the instance and node names are valid.
6734

6735
    """
6736
    instance_name = self.op.instance_name
6737
    self.instance = self.cfg.GetInstanceInfo(instance_name)
6738
    assert self.instance is not None, \
6739
          "Cannot retrieve locked instance %s" % self.op.instance_name
6740
    _CheckNodeOnline(self, self.instance.primary_node)
6741

    
6742
    self.dst_node = self.cfg.GetNodeInfo(
6743
      self.cfg.ExpandNodeName(self.op.target_node))
6744

    
6745
    if self.dst_node is None:
6746
      # This is wrong node name, not a non-locked node
6747
      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
6748
    _CheckNodeOnline(self, self.dst_node.name)
6749
    _CheckNodeNotDrained(self, self.dst_node.name)
6750

    
6751
    # instance disk type verification
6752
    for disk in self.instance.disks:
6753
      if disk.dev_type == constants.LD_FILE:
6754
        raise errors.OpPrereqError("Export not supported for instances with"
6755
                                   " file-based disks")
6756

    
6757
  def Exec(self, feedback_fn):
6758
    """Export an instance to an image in the cluster.
6759

6760
    """
6761
    instance = self.instance
6762
    dst_node = self.dst_node
6763
    src_node = instance.primary_node
6764
    if self.op.shutdown:
6765
      # shutdown the instance, but not the disks
6766
      result = self.rpc.call_instance_shutdown(src_node, instance)
6767
      result.Raise("Could not shutdown instance %s on"
6768
                   " node %s" % (instance.name, src_node))
6769

    
6770
    vgname = self.cfg.GetVGName()
6771

    
6772
    snap_disks = []
6773

    
6774
    # set the disks ID correctly since call_instance_start needs the
6775
    # correct drbd minor to create the symlinks
6776
    for disk in instance.disks:
6777
      self.cfg.SetDiskID(disk, src_node)
6778

    
6779
    try:
6780
      for idx, disk in enumerate(instance.disks):
6781
        # result.payload will be a snapshot of an lvm leaf of the one we passed
6782
        result = self.rpc.call_blockdev_snapshot(src_node, disk)
6783
        msg = result.fail_msg
6784
        if msg:
6785
          self.LogWarning("Could not snapshot disk/%s on node %s: %s",
6786
                          idx, src_node, msg)
6787
          snap_disks.append(False)
6788
        else:
6789
          disk_id = (vgname, result.payload)
6790
          new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
6791
                                 logical_id=disk_id, physical_id=disk_id,
6792
                                 iv_name=disk.iv_name)
6793
          snap_disks.append(new_dev)
6794

    
6795
    finally:
6796
      if self.op.shutdown and instance.admin_up:
6797
        result = self.rpc.call_instance_start(src_node, instance, None, None)
6798
        msg = result.fail_msg
6799
        if msg:
6800
          _ShutdownInstanceDisks(self, instance)
6801
          raise errors.OpExecError("Could not start instance: %s" % msg)
6802

    
6803
    # TODO: check for size
6804

    
6805
    cluster_name = self.cfg.GetClusterName()
6806
    for idx, dev in enumerate(snap_disks):
6807
      if dev:
6808
        result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
6809
                                               instance, cluster_name, idx)
6810
        msg = result.fail_msg
6811
        if msg:
6812
          self.LogWarning("Could not export disk/%s from node %s to"
6813
                          " node %s: %s", idx, src_node, dst_node.name, msg)
6814
        msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
6815
        if msg:
6816
          self.LogWarning("Could not remove snapshot for disk/%d from node"
6817
                          " %s: %s", idx, src_node, msg)
6818

    
6819
    result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
6820
    msg = result.fail_msg
6821
    if msg:
6822
      self.LogWarning("Could not finalize export for instance %s"
6823
                      " on node %s: %s", instance.name, dst_node.name, msg)
6824

    
6825
    nodelist = self.cfg.GetNodeList()
6826
    nodelist.remove(dst_node.name)
6827

    
6828
    # on one-node clusters nodelist will be empty after the removal
6829
    # if we proceed the backup would be removed because OpQueryExports
6830
    # substitutes an empty list with the full cluster node list.
6831
    iname = instance.name
6832
    if nodelist:
6833
      exportlist = self.rpc.call_export_list(nodelist)
6834
      for node in exportlist:
6835
        if exportlist[node].fail_msg:
6836
          continue
6837
        if iname in exportlist[node].payload:
6838
          msg = self.rpc.call_export_remove(node, iname).fail_msg
6839
          if msg:
6840
            self.LogWarning("Could not remove older export for instance %s"
6841
                            " on node %s: %s", iname, node, msg)
6842

    
6843

    
6844
class LURemoveExport(NoHooksLU):
6845
  """Remove exports related to the named instance.
6846

6847
  """
6848
  _OP_REQP = ["instance_name"]
6849
  REQ_BGL = False
6850

    
6851
  def ExpandNames(self):
6852
    self.needed_locks = {}
6853
    # We need all nodes to be locked in order for RemoveExport to work, but we
6854
    # don't need to lock the instance itself, as nothing will happen to it (and
6855
    # we can remove exports also for a removed instance)
6856
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6857

    
6858
  def CheckPrereq(self):
6859
    """Check prerequisites.
6860
    """
6861
    pass
6862

    
6863
  def Exec(self, feedback_fn):
6864
    """Remove any export.
6865

6866
    """
6867
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
6868
    # If the instance was not found we'll try with the name that was passed in.
6869
    # This will only work if it was an FQDN, though.
6870
    fqdn_warn = False
6871
    if not instance_name:
6872
      fqdn_warn = True
6873
      instance_name = self.op.instance_name
6874

    
6875
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6876
    exportlist = self.rpc.call_export_list(locked_nodes)
6877
    found = False
6878
    for node in exportlist:
6879
      msg = exportlist[node].fail_msg
6880
      if msg:
6881
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
6882
        continue
6883
      if instance_name in exportlist[node].payload:
6884
        found = True
6885
        result = self.rpc.call_export_remove(node, instance_name)
6886
        msg = result.fail_msg
6887
        if msg:
6888
          logging.error("Could not remove export for instance %s"
6889
                        " on node %s: %s", instance_name, node, msg)
6890

    
6891
    if fqdn_warn and not found:
6892
      feedback_fn("Export not found. If trying to remove an export belonging"
6893
                  " to a deleted instance please use its Fully Qualified"
6894
                  " Domain Name.")
6895

    
6896

    
6897
class TagsLU(NoHooksLU):
6898
  """Generic tags LU.
6899

6900
  This is an abstract class which is the parent of all the other tags LUs.
6901

6902
  """
6903

    
6904
  def ExpandNames(self):
6905
    self.needed_locks = {}
6906
    if self.op.kind == constants.TAG_NODE:
6907
      name = self.cfg.ExpandNodeName(self.op.name)
6908
      if name is None:
6909
        raise errors.OpPrereqError("Invalid node name (%s)" %
6910
                                   (self.op.name,))
6911
      self.op.name = name
6912
      self.needed_locks[locking.LEVEL_NODE] = name
6913
    elif self.op.kind == constants.TAG_INSTANCE:
6914
      name = self.cfg.ExpandInstanceName(self.op.name)
6915
      if name is None:
6916
        raise errors.OpPrereqError("Invalid instance name (%s)" %
6917
                                   (self.op.name,))
6918
      self.op.name = name
6919
      self.needed_locks[locking.LEVEL_INSTANCE] = name
6920

    
6921
  def CheckPrereq(self):
6922
    """Check prerequisites.
6923

6924
    """
6925
    if self.op.kind == constants.TAG_CLUSTER:
6926
      self.target = self.cfg.GetClusterInfo()
6927
    elif self.op.kind == constants.TAG_NODE:
6928
      self.target = self.cfg.GetNodeInfo(self.op.name)
6929
    elif self.op.kind == constants.TAG_INSTANCE:
6930
      self.target = self.cfg.GetInstanceInfo(self.op.name)
6931
    else:
6932
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
6933
                                 str(self.op.kind))
6934

    
6935

    
6936
class LUGetTags(TagsLU):
6937
  """Returns the tags of a given object.
6938

6939
  """
6940
  _OP_REQP = ["kind", "name"]
6941
  REQ_BGL = False
6942

    
6943
  def Exec(self, feedback_fn):
6944
    """Returns the tag list.
6945

6946
    """
6947
    return list(self.target.GetTags())
6948

    
6949

    
6950
class LUSearchTags(NoHooksLU):
6951
  """Searches the tags for a given pattern.
6952

6953
  """
6954
  _OP_REQP = ["pattern"]
6955
  REQ_BGL = False
6956

    
6957
  def ExpandNames(self):
6958
    self.needed_locks = {}
6959

    
6960
  def CheckPrereq(self):
6961
    """Check prerequisites.
6962

6963
    This checks the pattern passed for validity by compiling it.
6964

6965
    """
6966
    try:
6967
      self.re = re.compile(self.op.pattern)
6968
    except re.error, err:
6969
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
6970
                                 (self.op.pattern, err))
6971

    
6972
  def Exec(self, feedback_fn):
6973
    """Returns the tag list.
6974

6975
    """
6976
    cfg = self.cfg
6977
    tgts = [("/cluster", cfg.GetClusterInfo())]
6978
    ilist = cfg.GetAllInstancesInfo().values()
6979
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
6980
    nlist = cfg.GetAllNodesInfo().values()
6981
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
6982
    results = []
6983
    for path, target in tgts:
6984
      for tag in target.GetTags():
6985
        if self.re.search(tag):
6986
          results.append((path, tag))
6987
    return results
6988

    
6989

    
6990
class LUAddTags(TagsLU):
6991
  """Sets a tag on a given object.
6992

6993
  """
6994
  _OP_REQP = ["kind", "name", "tags"]
6995
  REQ_BGL = False
6996

    
6997
  def CheckPrereq(self):
6998
    """Check prerequisites.
6999

7000
    This checks the type and length of the tag name and value.
7001

7002
    """
7003
    TagsLU.CheckPrereq(self)
7004
    for tag in self.op.tags:
7005
      objects.TaggableObject.ValidateTag(tag)
7006

    
7007
  def Exec(self, feedback_fn):
7008
    """Sets the tag.
7009

7010
    """
7011
    try:
7012
      for tag in self.op.tags:
7013
        self.target.AddTag(tag)
7014
    except errors.TagError, err:
7015
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
7016
    try:
7017
      self.cfg.Update(self.target)
7018
    except errors.ConfigurationError:
7019
      raise errors.OpRetryError("There has been a modification to the"
7020
                                " config file and the operation has been"
7021
                                " aborted. Please retry.")
7022

    
7023

    
7024
class LUDelTags(TagsLU):
7025
  """Delete a list of tags from a given object.
7026

7027
  """
7028
  _OP_REQP = ["kind", "name", "tags"]
7029
  REQ_BGL = False
7030

    
7031
  def CheckPrereq(self):
7032
    """Check prerequisites.
7033

7034
    This checks that we have the given tag.
7035

7036
    """
7037
    TagsLU.CheckPrereq(self)
7038
    for tag in self.op.tags:
7039
      objects.TaggableObject.ValidateTag(tag)
7040
    del_tags = frozenset(self.op.tags)
7041
    cur_tags = self.target.GetTags()
7042
    if not del_tags <= cur_tags:
7043
      diff_tags = del_tags - cur_tags
7044
      diff_names = ["'%s'" % tag for tag in diff_tags]
7045
      diff_names.sort()
7046
      raise errors.OpPrereqError("Tag(s) %s not found" %
7047
                                 (",".join(diff_names)))
7048

    
7049
  def Exec(self, feedback_fn):
7050
    """Remove the tag from the object.
7051

7052
    """
7053
    for tag in self.op.tags:
7054
      self.target.RemoveTag(tag)
7055
    try:
7056
      self.cfg.Update(self.target)
7057
    except errors.ConfigurationError:
7058
      raise errors.OpRetryError("There has been a modification to the"
7059
                                " config file and the operation has been"
7060
                                " aborted. Please retry.")
7061

    
7062

    
7063
class LUTestDelay(NoHooksLU):
7064
  """Sleep for a specified amount of time.
7065

7066
  This LU sleeps on the master and/or nodes for a specified amount of
7067
  time.
7068

7069
  """
7070
  _OP_REQP = ["duration", "on_master", "on_nodes"]
7071
  REQ_BGL = False
7072

    
7073
  def ExpandNames(self):
7074
    """Expand names and set required locks.
7075

7076
    This expands the node list, if any.
7077

7078
    """
7079
    self.needed_locks = {}
7080
    if self.op.on_nodes:
7081
      # _GetWantedNodes can be used here, but is not always appropriate to use
7082
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
7083
      # more information.
7084
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
7085
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
7086

    
7087
  def CheckPrereq(self):
7088
    """Check prerequisites.
7089

7090
    """
7091

    
7092
  def Exec(self, feedback_fn):
7093
    """Do the actual sleep.
7094

7095
    """
7096
    if self.op.on_master:
7097
      if not utils.TestDelay(self.op.duration):
7098
        raise errors.OpExecError("Error during master delay test")
7099
    if self.op.on_nodes:
7100
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
7101
      for node, node_result in result.items():
7102
        node_result.Raise("Failure during rpc call to node %s" % node)
7103

    
7104

    
7105
class IAllocator(object):
7106
  """IAllocator framework.
7107

7108
  An IAllocator instance has three sets of attributes:
7109
    - cfg that is needed to query the cluster
7110
    - input data (all members of the _KEYS class attribute are required)
7111
    - four buffer attributes (in|out_data|text), that represent the
7112
      input (to the external script) in text and data structure format,
7113
      and the output from it, again in two formats
7114
    - the result variables from the script (success, info, nodes) for
7115
      easy usage
7116

7117
  """
7118
  _ALLO_KEYS = [
7119
    "mem_size", "disks", "disk_template",
7120
    "os", "tags", "nics", "vcpus", "hypervisor",
7121
    ]
7122
  _RELO_KEYS = [
7123
    "relocate_from",
7124
    ]
7125

    
7126
  def __init__(self, cfg, rpc, mode, name, **kwargs):
7127
    self.cfg = cfg
7128
    self.rpc = rpc
7129
    # init buffer variables
7130
    self.in_text = self.out_text = self.in_data = self.out_data = None
7131
    # init all input fields so that pylint is happy
7132
    self.mode = mode
7133
    self.name = name
7134
    self.mem_size = self.disks = self.disk_template = None
7135
    self.os = self.tags = self.nics = self.vcpus = None
7136
    self.hypervisor = None
7137
    self.relocate_from = None
7138
    # computed fields
7139
    self.required_nodes = None
7140
    # init result fields
7141
    self.success = self.info = self.nodes = None
7142
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7143
      keyset = self._ALLO_KEYS
7144
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
7145
      keyset = self._RELO_KEYS
7146
    else:
7147
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
7148
                                   " IAllocator" % self.mode)
7149
    for key in kwargs:
7150
      if key not in keyset:
7151
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
7152
                                     " IAllocator" % key)
7153
      setattr(self, key, kwargs[key])
7154
    for key in keyset:
7155
      if key not in kwargs:
7156
        raise errors.ProgrammerError("Missing input parameter '%s' to"
7157
                                     " IAllocator" % key)
7158
    self._BuildInputData()
7159

    
7160
  def _ComputeClusterData(self):
7161
    """Compute the generic allocator input data.
7162

7163
    This is the data that is independent of the actual operation.
7164

7165
    """
7166
    cfg = self.cfg
7167
    cluster_info = cfg.GetClusterInfo()
7168
    # cluster data
7169
    data = {
7170
      "version": constants.IALLOCATOR_VERSION,
7171
      "cluster_name": cfg.GetClusterName(),
7172
      "cluster_tags": list(cluster_info.GetTags()),
7173
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
7174
      # we don't have job IDs
7175
      }
7176
    iinfo = cfg.GetAllInstancesInfo().values()
7177
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
7178

    
7179
    # node data
7180
    node_results = {}
7181
    node_list = cfg.GetNodeList()
7182

    
7183
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7184
      hypervisor_name = self.hypervisor
7185
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
7186
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
7187

    
7188
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
7189
                                        hypervisor_name)
7190
    node_iinfo = \
7191
      self.rpc.call_all_instances_info(node_list,
7192
                                       cluster_info.enabled_hypervisors)
7193
    for nname, nresult in node_data.items():
7194
      # first fill in static (config-based) values
7195
      ninfo = cfg.GetNodeInfo(nname)
7196
      pnr = {
7197
        "tags": list(ninfo.GetTags()),
7198
        "primary_ip": ninfo.primary_ip,
7199
        "secondary_ip": ninfo.secondary_ip,
7200
        "offline": ninfo.offline,
7201
        "drained": ninfo.drained,
7202
        "master_candidate": ninfo.master_candidate,
7203
        }
7204

    
7205
      if not ninfo.offline:
7206
        nresult.Raise("Can't get data for node %s" % nname)
7207
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
7208
                                nname)
7209
        remote_info = nresult.payload
7210
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
7211
                     'vg_size', 'vg_free', 'cpu_total']:
7212
          if attr not in remote_info:
7213
            raise errors.OpExecError("Node '%s' didn't return attribute"
7214
                                     " '%s'" % (nname, attr))
7215
          if not isinstance(remote_info[attr], int):
7216
            raise errors.OpExecError("Node '%s' returned invalid value"
7217
                                     " for '%s': %s" %
7218
                                     (nname, attr, remote_info[attr]))
7219
        # compute memory used by primary instances
7220
        i_p_mem = i_p_up_mem = 0
7221
        for iinfo, beinfo in i_list:
7222
          if iinfo.primary_node == nname:
7223
            i_p_mem += beinfo[constants.BE_MEMORY]
7224
            if iinfo.name not in node_iinfo[nname].payload:
7225
              i_used_mem = 0
7226
            else:
7227
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
7228
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
7229
            remote_info['memory_free'] -= max(0, i_mem_diff)
7230

    
7231
            if iinfo.admin_up:
7232
              i_p_up_mem += beinfo[constants.BE_MEMORY]
7233

    
7234
        # compute memory used by instances
7235
        pnr_dyn = {
7236
          "total_memory": remote_info['memory_total'],
7237
          "reserved_memory": remote_info['memory_dom0'],
7238
          "free_memory": remote_info['memory_free'],
7239
          "total_disk": remote_info['vg_size'],
7240
          "free_disk": remote_info['vg_free'],
7241
          "total_cpus": remote_info['cpu_total'],
7242
          "i_pri_memory": i_p_mem,
7243
          "i_pri_up_memory": i_p_up_mem,
7244
          }
7245
        pnr.update(pnr_dyn)
7246

    
7247
      node_results[nname] = pnr
7248
    data["nodes"] = node_results
7249

    
7250
    # instance data
7251
    instance_data = {}
7252
    for iinfo, beinfo in i_list:
7253
      nic_data = []
7254
      for nic in iinfo.nics:
7255
        filled_params = objects.FillDict(
7256
            cluster_info.nicparams[constants.PP_DEFAULT],
7257
            nic.nicparams)
7258
        nic_dict = {"mac": nic.mac,
7259
                    "ip": nic.ip,
7260
                    "mode": filled_params[constants.NIC_MODE],
7261
                    "link": filled_params[constants.NIC_LINK],
7262
                   }
7263
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
7264
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
7265
        nic_data.append(nic_dict)
7266
      pir = {
7267
        "tags": list(iinfo.GetTags()),
7268
        "admin_up": iinfo.admin_up,
7269
        "vcpus": beinfo[constants.BE_VCPUS],
7270
        "memory": beinfo[constants.BE_MEMORY],
7271
        "os": iinfo.os,
7272
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
7273
        "nics": nic_data,
7274
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
7275
        "disk_template": iinfo.disk_template,
7276
        "hypervisor": iinfo.hypervisor,
7277
        }
7278
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
7279
                                                 pir["disks"])
7280
      instance_data[iinfo.name] = pir
7281

    
7282
    data["instances"] = instance_data
7283

    
7284
    self.in_data = data
7285

    
7286
  def _AddNewInstance(self):
7287
    """Add new instance data to allocator structure.
7288

7289
    This in combination with _AllocatorGetClusterData will create the
7290
    correct structure needed as input for the allocator.
7291

7292
    The checks for the completeness of the opcode must have already been
7293
    done.
7294

7295
    """
7296
    data = self.in_data
7297

    
7298
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
7299

    
7300
    if self.disk_template in constants.DTS_NET_MIRROR:
7301
      self.required_nodes = 2
7302
    else:
7303
      self.required_nodes = 1
7304
    request = {
7305
      "type": "allocate",
7306
      "name": self.name,
7307
      "disk_template": self.disk_template,
7308
      "tags": self.tags,
7309
      "os": self.os,
7310
      "vcpus": self.vcpus,
7311
      "memory": self.mem_size,
7312
      "disks": self.disks,
7313
      "disk_space_total": disk_space,
7314
      "nics": self.nics,
7315
      "required_nodes": self.required_nodes,
7316
      }
7317
    data["request"] = request
7318

    
7319
  def _AddRelocateInstance(self):
7320
    """Add relocate instance data to allocator structure.
7321

7322
    This in combination with _IAllocatorGetClusterData will create the
7323
    correct structure needed as input for the allocator.
7324

7325
    The checks for the completeness of the opcode must have already been
7326
    done.
7327

7328
    """
7329
    instance = self.cfg.GetInstanceInfo(self.name)
7330
    if instance is None:
7331
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
7332
                                   " IAllocator" % self.name)
7333

    
7334
    if instance.disk_template not in constants.DTS_NET_MIRROR:
7335
      raise errors.OpPrereqError("Can't relocate non-mirrored instances")
7336

    
7337
    if len(instance.secondary_nodes) != 1:
7338
      raise errors.OpPrereqError("Instance has not exactly one secondary node")
7339

    
7340
    self.required_nodes = 1
7341
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
7342
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
7343

    
7344
    request = {
7345
      "type": "relocate",
7346
      "name": self.name,
7347
      "disk_space_total": disk_space,
7348
      "required_nodes": self.required_nodes,
7349
      "relocate_from": self.relocate_from,
7350
      }
7351
    self.in_data["request"] = request
7352

    
7353
  def _BuildInputData(self):
7354
    """Build input data structures.
7355

7356
    """
7357
    self._ComputeClusterData()
7358

    
7359
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7360
      self._AddNewInstance()
7361
    else:
7362
      self._AddRelocateInstance()
7363

    
7364
    self.in_text = serializer.Dump(self.in_data)
7365

    
7366
  def Run(self, name, validate=True, call_fn=None):
7367
    """Run an instance allocator and return the results.
7368

7369
    """
7370
    if call_fn is None:
7371
      call_fn = self.rpc.call_iallocator_runner
7372

    
7373
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
7374
    result.Raise("Failure while running the iallocator script")
7375

    
7376
    self.out_text = result.payload
7377
    if validate:
7378
      self._ValidateResult()
7379

    
7380
  def _ValidateResult(self):
7381
    """Process the allocator results.
7382

7383
    This will process and if successful save the result in
7384
    self.out_data and the other parameters.
7385

7386
    """
7387
    try:
7388
      rdict = serializer.Load(self.out_text)
7389
    except Exception, err:
7390
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
7391

    
7392
    if not isinstance(rdict, dict):
7393
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
7394

    
7395
    for key in "success", "info", "nodes":
7396
      if key not in rdict:
7397
        raise errors.OpExecError("Can't parse iallocator results:"
7398
                                 " missing key '%s'" % key)
7399
      setattr(self, key, rdict[key])
7400

    
7401
    if not isinstance(rdict["nodes"], list):
7402
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
7403
                               " is not a list")
7404
    self.out_data = rdict
7405

    
7406

    
7407
class LUTestAllocator(NoHooksLU):
7408
  """Run allocator tests.
7409

7410
  This LU runs the allocator tests
7411

7412
  """
7413
  _OP_REQP = ["direction", "mode", "name"]
7414

    
7415
  def CheckPrereq(self):
7416
    """Check prerequisites.
7417

7418
    This checks the opcode parameters depending on the director and mode test.
7419

7420
    """
7421
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
7422
      for attr in ["name", "mem_size", "disks", "disk_template",
7423
                   "os", "tags", "nics", "vcpus"]:
7424
        if not hasattr(self.op, attr):
7425
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
7426
                                     attr)
7427
      iname = self.cfg.ExpandInstanceName(self.op.name)
7428
      if iname is not None:
7429
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
7430
                                   iname)
7431
      if not isinstance(self.op.nics, list):
7432
        raise errors.OpPrereqError("Invalid parameter 'nics'")
7433
      for row in self.op.nics:
7434
        if (not isinstance(row, dict) or
7435
            "mac" not in row or
7436
            "ip" not in row or
7437
            "bridge" not in row):
7438
          raise errors.OpPrereqError("Invalid contents of the"
7439
                                     " 'nics' parameter")
7440
      if not isinstance(self.op.disks, list):
7441
        raise errors.OpPrereqError("Invalid parameter 'disks'")
7442
      for row in self.op.disks:
7443
        if (not isinstance(row, dict) or
7444
            "size" not in row or
7445
            not isinstance(row["size"], int) or
7446
            "mode" not in row or
7447
            row["mode"] not in ['r', 'w']):
7448
          raise errors.OpPrereqError("Invalid contents of the"
7449
                                     " 'disks' parameter")
7450
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
7451
        self.op.hypervisor = self.cfg.GetHypervisorType()
7452
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
7453
      if not hasattr(self.op, "name"):
7454
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
7455
      fname = self.cfg.ExpandInstanceName(self.op.name)
7456
      if fname is None:
7457
        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
7458
                                   self.op.name)
7459
      self.op.name = fname
7460
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
7461
    else:
7462
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
7463
                                 self.op.mode)
7464

    
7465
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
7466
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
7467
        raise errors.OpPrereqError("Missing allocator name")
7468
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
7469
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
7470
                                 self.op.direction)
7471

    
7472
  def Exec(self, feedback_fn):
7473
    """Run the allocator test.
7474

7475
    """
7476
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
7477
      ial = IAllocator(self.cfg, self.rpc,
7478
                       mode=self.op.mode,
7479
                       name=self.op.name,
7480
                       mem_size=self.op.mem_size,
7481
                       disks=self.op.disks,
7482
                       disk_template=self.op.disk_template,
7483
                       os=self.op.os,
7484
                       tags=self.op.tags,
7485
                       nics=self.op.nics,
7486
                       vcpus=self.op.vcpus,
7487
                       hypervisor=self.op.hypervisor,
7488
                       )
7489
    else:
7490
      ial = IAllocator(self.cfg, self.rpc,
7491
                       mode=self.op.mode,
7492
                       name=self.op.name,
7493
                       relocate_from=list(self.relocate_from),
7494
                       )
7495

    
7496
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
7497
      result = ial.in_text
7498
    else:
7499
      ial.Run(self.op.allocator, validate=False)
7500
      result = ial.out_text
7501
    return result