Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ abae1b2b

History | View | Annotate | Download (262.2 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0613,W0201
25

    
26
import os
27
import os.path
28
import time
29
import re
30
import platform
31
import logging
32
import copy
33

    
34
from ganeti import ssh
35
from ganeti import utils
36
from ganeti import errors
37
from ganeti import hypervisor
38
from ganeti import locking
39
from ganeti import constants
40
from ganeti import objects
41
from ganeti import serializer
42
from ganeti import ssconf
43

    
44

    
45
class LogicalUnit(object):
46
  """Logical Unit base class.
47

48
  Subclasses must follow these rules:
49
    - implement ExpandNames
50
    - implement CheckPrereq (except when tasklets are used)
51
    - implement Exec (except when tasklets are used)
52
    - implement BuildHooksEnv
53
    - redefine HPATH and HTYPE
54
    - optionally redefine their run requirements:
55
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
56

57
  Note that all commands require root permissions.
58

59
  @ivar dry_run_result: the value (if any) that will be returned to the caller
60
      in dry-run mode (signalled by opcode dry_run parameter)
61

62
  """
63
  HPATH = None
64
  HTYPE = None
65
  _OP_REQP = []
66
  REQ_BGL = True
67

    
68
  def __init__(self, processor, op, context, rpc):
69
    """Constructor for LogicalUnit.
70

71
    This needs to be overridden in derived classes in order to check op
72
    validity.
73

74
    """
75
    self.proc = processor
76
    self.op = op
77
    self.cfg = context.cfg
78
    self.context = context
79
    self.rpc = rpc
80
    # Dicts used to declare locking needs to mcpu
81
    self.needed_locks = None
82
    self.acquired_locks = {}
83
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
84
    self.add_locks = {}
85
    self.remove_locks = {}
86
    # Used to force good behavior when calling helper functions
87
    self.recalculate_locks = {}
88
    self.__ssh = None
89
    # logging
90
    self.LogWarning = processor.LogWarning
91
    self.LogInfo = processor.LogInfo
92
    self.LogStep = processor.LogStep
93
    # support for dry-run
94
    self.dry_run_result = None
95

    
96
    # Tasklets
97
    self.tasklets = None
98

    
99
    for attr_name in self._OP_REQP:
100
      attr_val = getattr(op, attr_name, None)
101
      if attr_val is None:
102
        raise errors.OpPrereqError("Required parameter '%s' missing" %
103
                                   attr_name)
104

    
105
    self.CheckArguments()
106

    
107
  def __GetSSH(self):
108
    """Returns the SshRunner object
109

110
    """
111
    if not self.__ssh:
112
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
113
    return self.__ssh
114

    
115
  ssh = property(fget=__GetSSH)
116

    
117
  def CheckArguments(self):
118
    """Check syntactic validity for the opcode arguments.
119

120
    This method is for doing a simple syntactic check and ensure
121
    validity of opcode parameters, without any cluster-related
122
    checks. While the same can be accomplished in ExpandNames and/or
123
    CheckPrereq, doing these separate is better because:
124

125
      - ExpandNames is left as as purely a lock-related function
126
      - CheckPrereq is run after we have acquired locks (and possible
127
        waited for them)
128

129
    The function is allowed to change the self.op attribute so that
130
    later methods can no longer worry about missing parameters.
131

132
    """
133
    pass
134

    
135
  def ExpandNames(self):
136
    """Expand names for this LU.
137

138
    This method is called before starting to execute the opcode, and it should
139
    update all the parameters of the opcode to their canonical form (e.g. a
140
    short node name must be fully expanded after this method has successfully
141
    completed). This way locking, hooks, logging, ecc. can work correctly.
142

143
    LUs which implement this method must also populate the self.needed_locks
144
    member, as a dict with lock levels as keys, and a list of needed lock names
145
    as values. Rules:
146

147
      - use an empty dict if you don't need any lock
148
      - if you don't need any lock at a particular level omit that level
149
      - don't put anything for the BGL level
150
      - if you want all locks at a level use locking.ALL_SET as a value
151

152
    If you need to share locks (rather than acquire them exclusively) at one
153
    level you can modify self.share_locks, setting a true value (usually 1) for
154
    that level. By default locks are not shared.
155

156
    This function can also define a list of tasklets, which then will be
157
    executed in order instead of the usual LU-level CheckPrereq and Exec
158
    functions, if those are not defined by the LU.
159

160
    Examples::
161

162
      # Acquire all nodes and one instance
163
      self.needed_locks = {
164
        locking.LEVEL_NODE: locking.ALL_SET,
165
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
166
      }
167
      # Acquire just two nodes
168
      self.needed_locks = {
169
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
170
      }
171
      # Acquire no locks
172
      self.needed_locks = {} # No, you can't leave it to the default value None
173

174
    """
175
    # The implementation of this method is mandatory only if the new LU is
176
    # concurrent, so that old LUs don't need to be changed all at the same
177
    # time.
178
    if self.REQ_BGL:
179
      self.needed_locks = {} # Exclusive LUs don't need locks.
180
    else:
181
      raise NotImplementedError
182

    
183
  def DeclareLocks(self, level):
184
    """Declare LU locking needs for a level
185

186
    While most LUs can just declare their locking needs at ExpandNames time,
187
    sometimes there's the need to calculate some locks after having acquired
188
    the ones before. This function is called just before acquiring locks at a
189
    particular level, but after acquiring the ones at lower levels, and permits
190
    such calculations. It can be used to modify self.needed_locks, and by
191
    default it does nothing.
192

193
    This function is only called if you have something already set in
194
    self.needed_locks for the level.
195

196
    @param level: Locking level which is going to be locked
197
    @type level: member of ganeti.locking.LEVELS
198

199
    """
200

    
201
  def CheckPrereq(self):
202
    """Check prerequisites for this LU.
203

204
    This method should check that the prerequisites for the execution
205
    of this LU are fulfilled. It can do internode communication, but
206
    it should be idempotent - no cluster or system changes are
207
    allowed.
208

209
    The method should raise errors.OpPrereqError in case something is
210
    not fulfilled. Its return value is ignored.
211

212
    This method should also update all the parameters of the opcode to
213
    their canonical form if it hasn't been done by ExpandNames before.
214

215
    """
216
    if self.tasklets is not None:
217
      for (idx, tl) in enumerate(self.tasklets):
218
        logging.debug("Checking prerequisites for tasklet %s/%s",
219
                      idx + 1, len(self.tasklets))
220
        tl.CheckPrereq()
221
    else:
222
      raise NotImplementedError
223

    
224
  def Exec(self, feedback_fn):
225
    """Execute the LU.
226

227
    This method should implement the actual work. It should raise
228
    errors.OpExecError for failures that are somewhat dealt with in
229
    code, or expected.
230

231
    """
232
    if self.tasklets is not None:
233
      for (idx, tl) in enumerate(self.tasklets):
234
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
235
        tl.Exec(feedback_fn)
236
    else:
237
      raise NotImplementedError
238

    
239
  def BuildHooksEnv(self):
240
    """Build hooks environment for this LU.
241

242
    This method should return a three-node tuple consisting of: a dict
243
    containing the environment that will be used for running the
244
    specific hook for this LU, a list of node names on which the hook
245
    should run before the execution, and a list of node names on which
246
    the hook should run after the execution.
247

248
    The keys of the dict must not have 'GANETI_' prefixed as this will
249
    be handled in the hooks runner. Also note additional keys will be
250
    added by the hooks runner. If the LU doesn't define any
251
    environment, an empty dict (and not None) should be returned.
252

253
    No nodes should be returned as an empty list (and not None).
254

255
    Note that if the HPATH for a LU class is None, this function will
256
    not be called.
257

258
    """
259
    raise NotImplementedError
260

    
261
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
262
    """Notify the LU about the results of its hooks.
263

264
    This method is called every time a hooks phase is executed, and notifies
265
    the Logical Unit about the hooks' result. The LU can then use it to alter
266
    its result based on the hooks.  By default the method does nothing and the
267
    previous result is passed back unchanged but any LU can define it if it
268
    wants to use the local cluster hook-scripts somehow.
269

270
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
271
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
272
    @param hook_results: the results of the multi-node hooks rpc call
273
    @param feedback_fn: function used send feedback back to the caller
274
    @param lu_result: the previous Exec result this LU had, or None
275
        in the PRE phase
276
    @return: the new Exec result, based on the previous result
277
        and hook results
278

279
    """
280
    return lu_result
281

    
282
  def _ExpandAndLockInstance(self):
283
    """Helper function to expand and lock an instance.
284

285
    Many LUs that work on an instance take its name in self.op.instance_name
286
    and need to expand it and then declare the expanded name for locking. This
287
    function does it, and then updates self.op.instance_name to the expanded
288
    name. It also initializes needed_locks as a dict, if this hasn't been done
289
    before.
290

291
    """
292
    if self.needed_locks is None:
293
      self.needed_locks = {}
294
    else:
295
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
296
        "_ExpandAndLockInstance called with instance-level locks set"
297
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
298
    if expanded_name is None:
299
      raise errors.OpPrereqError("Instance '%s' not known" %
300
                                  self.op.instance_name)
301
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
302
    self.op.instance_name = expanded_name
303

    
304
  def _LockInstancesNodes(self, primary_only=False):
305
    """Helper function to declare instances' nodes for locking.
306

307
    This function should be called after locking one or more instances to lock
308
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
309
    with all primary or secondary nodes for instances already locked and
310
    present in self.needed_locks[locking.LEVEL_INSTANCE].
311

312
    It should be called from DeclareLocks, and for safety only works if
313
    self.recalculate_locks[locking.LEVEL_NODE] is set.
314

315
    In the future it may grow parameters to just lock some instance's nodes, or
316
    to just lock primaries or secondary nodes, if needed.
317

318
    If should be called in DeclareLocks in a way similar to::
319

320
      if level == locking.LEVEL_NODE:
321
        self._LockInstancesNodes()
322

323
    @type primary_only: boolean
324
    @param primary_only: only lock primary nodes of locked instances
325

326
    """
327
    assert locking.LEVEL_NODE in self.recalculate_locks, \
328
      "_LockInstancesNodes helper function called with no nodes to recalculate"
329

    
330
    # TODO: check if we're really been called with the instance locks held
331

    
332
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
333
    # future we might want to have different behaviors depending on the value
334
    # of self.recalculate_locks[locking.LEVEL_NODE]
335
    wanted_nodes = []
336
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
337
      instance = self.context.cfg.GetInstanceInfo(instance_name)
338
      wanted_nodes.append(instance.primary_node)
339
      if not primary_only:
340
        wanted_nodes.extend(instance.secondary_nodes)
341

    
342
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
343
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
344
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
345
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
346

    
347
    del self.recalculate_locks[locking.LEVEL_NODE]
348

    
349

    
350
class NoHooksLU(LogicalUnit):
351
  """Simple LU which runs no hooks.
352

353
  This LU is intended as a parent for other LogicalUnits which will
354
  run no hooks, in order to reduce duplicate code.
355

356
  """
357
  HPATH = None
358
  HTYPE = None
359

    
360

    
361
class Tasklet:
362
  """Tasklet base class.
363

364
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
365
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
366
  tasklets know nothing about locks.
367

368
  Subclasses must follow these rules:
369
    - Implement CheckPrereq
370
    - Implement Exec
371

372
  """
373
  def __init__(self, lu):
374
    self.lu = lu
375

    
376
    # Shortcuts
377
    self.cfg = lu.cfg
378
    self.rpc = lu.rpc
379

    
380
  def CheckPrereq(self):
381
    """Check prerequisites for this tasklets.
382

383
    This method should check whether the prerequisites for the execution of
384
    this tasklet are fulfilled. It can do internode communication, but it
385
    should be idempotent - no cluster or system changes are allowed.
386

387
    The method should raise errors.OpPrereqError in case something is not
388
    fulfilled. Its return value is ignored.
389

390
    This method should also update all parameters to their canonical form if it
391
    hasn't been done before.
392

393
    """
394
    raise NotImplementedError
395

    
396
  def Exec(self, feedback_fn):
397
    """Execute the tasklet.
398

399
    This method should implement the actual work. It should raise
400
    errors.OpExecError for failures that are somewhat dealt with in code, or
401
    expected.
402

403
    """
404
    raise NotImplementedError
405

    
406

    
407
def _GetWantedNodes(lu, nodes):
408
  """Returns list of checked and expanded node names.
409

410
  @type lu: L{LogicalUnit}
411
  @param lu: the logical unit on whose behalf we execute
412
  @type nodes: list
413
  @param nodes: list of node names or None for all nodes
414
  @rtype: list
415
  @return: the list of nodes, sorted
416
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
417

418
  """
419
  if not isinstance(nodes, list):
420
    raise errors.OpPrereqError("Invalid argument type 'nodes'")
421

    
422
  if not nodes:
423
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
424
      " non-empty list of nodes whose name is to be expanded.")
425

    
426
  wanted = []
427
  for name in nodes:
428
    node = lu.cfg.ExpandNodeName(name)
429
    if node is None:
430
      raise errors.OpPrereqError("No such node name '%s'" % name)
431
    wanted.append(node)
432

    
433
  return utils.NiceSort(wanted)
434

    
435

    
436
def _GetWantedInstances(lu, instances):
437
  """Returns list of checked and expanded instance names.
438

439
  @type lu: L{LogicalUnit}
440
  @param lu: the logical unit on whose behalf we execute
441
  @type instances: list
442
  @param instances: list of instance names or None for all instances
443
  @rtype: list
444
  @return: the list of instances, sorted
445
  @raise errors.OpPrereqError: if the instances parameter is wrong type
446
  @raise errors.OpPrereqError: if any of the passed instances is not found
447

448
  """
449
  if not isinstance(instances, list):
450
    raise errors.OpPrereqError("Invalid argument type 'instances'")
451

    
452
  if instances:
453
    wanted = []
454

    
455
    for name in instances:
456
      instance = lu.cfg.ExpandInstanceName(name)
457
      if instance is None:
458
        raise errors.OpPrereqError("No such instance name '%s'" % name)
459
      wanted.append(instance)
460

    
461
  else:
462
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
463
  return wanted
464

    
465

    
466
def _CheckOutputFields(static, dynamic, selected):
467
  """Checks whether all selected fields are valid.
468

469
  @type static: L{utils.FieldSet}
470
  @param static: static fields set
471
  @type dynamic: L{utils.FieldSet}
472
  @param dynamic: dynamic fields set
473

474
  """
475
  f = utils.FieldSet()
476
  f.Extend(static)
477
  f.Extend(dynamic)
478

    
479
  delta = f.NonMatching(selected)
480
  if delta:
481
    raise errors.OpPrereqError("Unknown output fields selected: %s"
482
                               % ",".join(delta))
483

    
484

    
485
def _CheckBooleanOpField(op, name):
486
  """Validates boolean opcode parameters.
487

488
  This will ensure that an opcode parameter is either a boolean value,
489
  or None (but that it always exists).
490

491
  """
492
  val = getattr(op, name, None)
493
  if not (val is None or isinstance(val, bool)):
494
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
495
                               (name, str(val)))
496
  setattr(op, name, val)
497

    
498

    
499
def _CheckNodeOnline(lu, node):
500
  """Ensure that a given node is online.
501

502
  @param lu: the LU on behalf of which we make the check
503
  @param node: the node to check
504
  @raise errors.OpPrereqError: if the node is offline
505

506
  """
507
  if lu.cfg.GetNodeInfo(node).offline:
508
    raise errors.OpPrereqError("Can't use offline node %s" % node)
509

    
510

    
511
def _CheckNodeNotDrained(lu, node):
512
  """Ensure that a given node is not drained.
513

514
  @param lu: the LU on behalf of which we make the check
515
  @param node: the node to check
516
  @raise errors.OpPrereqError: if the node is drained
517

518
  """
519
  if lu.cfg.GetNodeInfo(node).drained:
520
    raise errors.OpPrereqError("Can't use drained node %s" % node)
521

    
522

    
523
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
524
                          memory, vcpus, nics, disk_template, disks,
525
                          bep, hvp, hypervisor_name):
526
  """Builds instance related env variables for hooks
527

528
  This builds the hook environment from individual variables.
529

530
  @type name: string
531
  @param name: the name of the instance
532
  @type primary_node: string
533
  @param primary_node: the name of the instance's primary node
534
  @type secondary_nodes: list
535
  @param secondary_nodes: list of secondary nodes as strings
536
  @type os_type: string
537
  @param os_type: the name of the instance's OS
538
  @type status: boolean
539
  @param status: the should_run status of the instance
540
  @type memory: string
541
  @param memory: the memory size of the instance
542
  @type vcpus: string
543
  @param vcpus: the count of VCPUs the instance has
544
  @type nics: list
545
  @param nics: list of tuples (ip, mac, mode, link) representing
546
      the NICs the instance has
547
  @type disk_template: string
548
  @param disk_template: the disk template of the instance
549
  @type disks: list
550
  @param disks: the list of (size, mode) pairs
551
  @type bep: dict
552
  @param bep: the backend parameters for the instance
553
  @type hvp: dict
554
  @param hvp: the hypervisor parameters for the instance
555
  @type hypervisor_name: string
556
  @param hypervisor_name: the hypervisor for the instance
557
  @rtype: dict
558
  @return: the hook environment for this instance
559

560
  """
561
  if status:
562
    str_status = "up"
563
  else:
564
    str_status = "down"
565
  env = {
566
    "OP_TARGET": name,
567
    "INSTANCE_NAME": name,
568
    "INSTANCE_PRIMARY": primary_node,
569
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
570
    "INSTANCE_OS_TYPE": os_type,
571
    "INSTANCE_STATUS": str_status,
572
    "INSTANCE_MEMORY": memory,
573
    "INSTANCE_VCPUS": vcpus,
574
    "INSTANCE_DISK_TEMPLATE": disk_template,
575
    "INSTANCE_HYPERVISOR": hypervisor_name,
576
  }
577

    
578
  if nics:
579
    nic_count = len(nics)
580
    for idx, (ip, mac, mode, link) in enumerate(nics):
581
      if ip is None:
582
        ip = ""
583
      env["INSTANCE_NIC%d_IP" % idx] = ip
584
      env["INSTANCE_NIC%d_MAC" % idx] = mac
585
      env["INSTANCE_NIC%d_MODE" % idx] = mode
586
      env["INSTANCE_NIC%d_LINK" % idx] = link
587
      if mode == constants.NIC_MODE_BRIDGED:
588
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
589
  else:
590
    nic_count = 0
591

    
592
  env["INSTANCE_NIC_COUNT"] = nic_count
593

    
594
  if disks:
595
    disk_count = len(disks)
596
    for idx, (size, mode) in enumerate(disks):
597
      env["INSTANCE_DISK%d_SIZE" % idx] = size
598
      env["INSTANCE_DISK%d_MODE" % idx] = mode
599
  else:
600
    disk_count = 0
601

    
602
  env["INSTANCE_DISK_COUNT"] = disk_count
603

    
604
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
605
    for key, value in source.items():
606
      env["INSTANCE_%s_%s" % (kind, key)] = value
607

    
608
  return env
609

    
610
def _NICListToTuple(lu, nics):
611
  """Build a list of nic information tuples.
612

613
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
614
  value in LUQueryInstanceData.
615

616
  @type lu:  L{LogicalUnit}
617
  @param lu: the logical unit on whose behalf we execute
618
  @type nics: list of L{objects.NIC}
619
  @param nics: list of nics to convert to hooks tuples
620

621
  """
622
  hooks_nics = []
623
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
624
  for nic in nics:
625
    ip = nic.ip
626
    mac = nic.mac
627
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
628
    mode = filled_params[constants.NIC_MODE]
629
    link = filled_params[constants.NIC_LINK]
630
    hooks_nics.append((ip, mac, mode, link))
631
  return hooks_nics
632

    
633
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
634
  """Builds instance related env variables for hooks from an object.
635

636
  @type lu: L{LogicalUnit}
637
  @param lu: the logical unit on whose behalf we execute
638
  @type instance: L{objects.Instance}
639
  @param instance: the instance for which we should build the
640
      environment
641
  @type override: dict
642
  @param override: dictionary with key/values that will override
643
      our values
644
  @rtype: dict
645
  @return: the hook environment dictionary
646

647
  """
648
  cluster = lu.cfg.GetClusterInfo()
649
  bep = cluster.FillBE(instance)
650
  hvp = cluster.FillHV(instance)
651
  args = {
652
    'name': instance.name,
653
    'primary_node': instance.primary_node,
654
    'secondary_nodes': instance.secondary_nodes,
655
    'os_type': instance.os,
656
    'status': instance.admin_up,
657
    'memory': bep[constants.BE_MEMORY],
658
    'vcpus': bep[constants.BE_VCPUS],
659
    'nics': _NICListToTuple(lu, instance.nics),
660
    'disk_template': instance.disk_template,
661
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
662
    'bep': bep,
663
    'hvp': hvp,
664
    'hypervisor_name': instance.hypervisor,
665
  }
666
  if override:
667
    args.update(override)
668
  return _BuildInstanceHookEnv(**args)
669

    
670

    
671
def _AdjustCandidatePool(lu):
672
  """Adjust the candidate pool after node operations.
673

674
  """
675
  mod_list = lu.cfg.MaintainCandidatePool()
676
  if mod_list:
677
    lu.LogInfo("Promoted nodes to master candidate role: %s",
678
               ", ".join(node.name for node in mod_list))
679
    for name in mod_list:
680
      lu.context.ReaddNode(name)
681
  mc_now, mc_max = lu.cfg.GetMasterCandidateStats()
682
  if mc_now > mc_max:
683
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
684
               (mc_now, mc_max))
685

    
686

    
687
def _CheckNicsBridgesExist(lu, target_nics, target_node,
688
                               profile=constants.PP_DEFAULT):
689
  """Check that the brigdes needed by a list of nics exist.
690

691
  """
692
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
693
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
694
                for nic in target_nics]
695
  brlist = [params[constants.NIC_LINK] for params in paramslist
696
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
697
  if brlist:
698
    result = lu.rpc.call_bridges_exist(target_node, brlist)
699
    result.Raise("Error checking bridges on destination node '%s'" %
700
                 target_node, prereq=True)
701

    
702

    
703
def _CheckInstanceBridgesExist(lu, instance, node=None):
704
  """Check that the brigdes needed by an instance exist.
705

706
  """
707
  if node is None:
708
    node = instance.primary_node
709
  _CheckNicsBridgesExist(lu, instance.nics, node)
710

    
711

    
712
def _GetNodePrimaryInstances(cfg, node_name):
713
  """Returns primary instances on a node.
714

715
  """
716
  instances = []
717

    
718
  for (_, inst) in cfg.GetAllInstancesInfo().iteritems():
719
    if node_name == inst.primary_node:
720
      instances.append(inst)
721

    
722
  return instances
723

    
724

    
725
def _GetNodeSecondaryInstances(cfg, node_name):
726
  """Returns secondary instances on a node.
727

728
  """
729
  instances = []
730

    
731
  for (_, inst) in cfg.GetAllInstancesInfo().iteritems():
732
    if node_name in inst.secondary_nodes:
733
      instances.append(inst)
734

    
735
  return instances
736

    
737

    
738
class LUDestroyCluster(NoHooksLU):
739
  """Logical unit for destroying the cluster.
740

741
  """
742
  _OP_REQP = []
743

    
744
  def CheckPrereq(self):
745
    """Check prerequisites.
746

747
    This checks whether the cluster is empty.
748

749
    Any errors are signaled by raising errors.OpPrereqError.
750

751
    """
752
    master = self.cfg.GetMasterNode()
753

    
754
    nodelist = self.cfg.GetNodeList()
755
    if len(nodelist) != 1 or nodelist[0] != master:
756
      raise errors.OpPrereqError("There are still %d node(s) in"
757
                                 " this cluster." % (len(nodelist) - 1))
758
    instancelist = self.cfg.GetInstanceList()
759
    if instancelist:
760
      raise errors.OpPrereqError("There are still %d instance(s) in"
761
                                 " this cluster." % len(instancelist))
762

    
763
  def Exec(self, feedback_fn):
764
    """Destroys the cluster.
765

766
    """
767
    master = self.cfg.GetMasterNode()
768
    result = self.rpc.call_node_stop_master(master, False)
769
    result.Raise("Could not disable the master role")
770
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
771
    utils.CreateBackup(priv_key)
772
    utils.CreateBackup(pub_key)
773
    return master
774

    
775

    
776
class LUVerifyCluster(LogicalUnit):
777
  """Verifies the cluster status.
778

779
  """
780
  HPATH = "cluster-verify"
781
  HTYPE = constants.HTYPE_CLUSTER
782
  _OP_REQP = ["skip_checks"]
783
  REQ_BGL = False
784

    
785
  def ExpandNames(self):
786
    self.needed_locks = {
787
      locking.LEVEL_NODE: locking.ALL_SET,
788
      locking.LEVEL_INSTANCE: locking.ALL_SET,
789
    }
790
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
791

    
792
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
793
                  node_result, feedback_fn, master_files,
794
                  drbd_map, vg_name):
795
    """Run multiple tests against a node.
796

797
    Test list:
798

799
      - compares ganeti version
800
      - checks vg existence and size > 20G
801
      - checks config file checksum
802
      - checks ssh to other nodes
803

804
    @type nodeinfo: L{objects.Node}
805
    @param nodeinfo: the node to check
806
    @param file_list: required list of files
807
    @param local_cksum: dictionary of local files and their checksums
808
    @param node_result: the results from the node
809
    @param feedback_fn: function used to accumulate results
810
    @param master_files: list of files that only masters should have
811
    @param drbd_map: the useddrbd minors for this node, in
812
        form of minor: (instance, must_exist) which correspond to instances
813
        and their running status
814
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
815

816
    """
817
    node = nodeinfo.name
818

    
819
    # main result, node_result should be a non-empty dict
820
    if not node_result or not isinstance(node_result, dict):
821
      feedback_fn("  - ERROR: unable to verify node %s." % (node,))
822
      return True
823

    
824
    # compares ganeti version
825
    local_version = constants.PROTOCOL_VERSION
826
    remote_version = node_result.get('version', None)
827
    if not (remote_version and isinstance(remote_version, (list, tuple)) and
828
            len(remote_version) == 2):
829
      feedback_fn("  - ERROR: connection to %s failed" % (node))
830
      return True
831

    
832
    if local_version != remote_version[0]:
833
      feedback_fn("  - ERROR: incompatible protocol versions: master %s,"
834
                  " node %s %s" % (local_version, node, remote_version[0]))
835
      return True
836

    
837
    # node seems compatible, we can actually try to look into its results
838

    
839
    bad = False
840

    
841
    # full package version
842
    if constants.RELEASE_VERSION != remote_version[1]:
843
      feedback_fn("  - WARNING: software version mismatch: master %s,"
844
                  " node %s %s" %
845
                  (constants.RELEASE_VERSION, node, remote_version[1]))
846

    
847
    # checks vg existence and size > 20G
848
    if vg_name is not None:
849
      vglist = node_result.get(constants.NV_VGLIST, None)
850
      if not vglist:
851
        feedback_fn("  - ERROR: unable to check volume groups on node %s." %
852
                        (node,))
853
        bad = True
854
      else:
855
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
856
                                              constants.MIN_VG_SIZE)
857
        if vgstatus:
858
          feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
859
          bad = True
860

    
861
    # checks config file checksum
862

    
863
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
864
    if not isinstance(remote_cksum, dict):
865
      bad = True
866
      feedback_fn("  - ERROR: node hasn't returned file checksum data")
867
    else:
868
      for file_name in file_list:
869
        node_is_mc = nodeinfo.master_candidate
870
        must_have_file = file_name not in master_files
871
        if file_name not in remote_cksum:
872
          if node_is_mc or must_have_file:
873
            bad = True
874
            feedback_fn("  - ERROR: file '%s' missing" % file_name)
875
        elif remote_cksum[file_name] != local_cksum[file_name]:
876
          if node_is_mc or must_have_file:
877
            bad = True
878
            feedback_fn("  - ERROR: file '%s' has wrong checksum" % file_name)
879
          else:
880
            # not candidate and this is not a must-have file
881
            bad = True
882
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
883
                        " candidates (and the file is outdated)" % file_name)
884
        else:
885
          # all good, except non-master/non-must have combination
886
          if not node_is_mc and not must_have_file:
887
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
888
                        " candidates" % file_name)
889

    
890
    # checks ssh to any
891

    
892
    if constants.NV_NODELIST not in node_result:
893
      bad = True
894
      feedback_fn("  - ERROR: node hasn't returned node ssh connectivity data")
895
    else:
896
      if node_result[constants.NV_NODELIST]:
897
        bad = True
898
        for node in node_result[constants.NV_NODELIST]:
899
          feedback_fn("  - ERROR: ssh communication with node '%s': %s" %
900
                          (node, node_result[constants.NV_NODELIST][node]))
901

    
902
    if constants.NV_NODENETTEST not in node_result:
903
      bad = True
904
      feedback_fn("  - ERROR: node hasn't returned node tcp connectivity data")
905
    else:
906
      if node_result[constants.NV_NODENETTEST]:
907
        bad = True
908
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
909
        for node in nlist:
910
          feedback_fn("  - ERROR: tcp communication with node '%s': %s" %
911
                          (node, node_result[constants.NV_NODENETTEST][node]))
912

    
913
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
914
    if isinstance(hyp_result, dict):
915
      for hv_name, hv_result in hyp_result.iteritems():
916
        if hv_result is not None:
917
          feedback_fn("  - ERROR: hypervisor %s verify failure: '%s'" %
918
                      (hv_name, hv_result))
919

    
920
    # check used drbd list
921
    if vg_name is not None:
922
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
923
      if not isinstance(used_minors, (tuple, list)):
924
        feedback_fn("  - ERROR: cannot parse drbd status file: %s" %
925
                    str(used_minors))
926
      else:
927
        for minor, (iname, must_exist) in drbd_map.items():
928
          if minor not in used_minors and must_exist:
929
            feedback_fn("  - ERROR: drbd minor %d of instance %s is"
930
                        " not active" % (minor, iname))
931
            bad = True
932
        for minor in used_minors:
933
          if minor not in drbd_map:
934
            feedback_fn("  - ERROR: unallocated drbd minor %d is in use" %
935
                        minor)
936
            bad = True
937

    
938
    return bad
939

    
940
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
941
                      node_instance, feedback_fn, n_offline):
942
    """Verify an instance.
943

944
    This function checks to see if the required block devices are
945
    available on the instance's node.
946

947
    """
948
    bad = False
949

    
950
    node_current = instanceconfig.primary_node
951

    
952
    node_vol_should = {}
953
    instanceconfig.MapLVsByNode(node_vol_should)
954

    
955
    for node in node_vol_should:
956
      if node in n_offline:
957
        # ignore missing volumes on offline nodes
958
        continue
959
      for volume in node_vol_should[node]:
960
        if node not in node_vol_is or volume not in node_vol_is[node]:
961
          feedback_fn("  - ERROR: volume %s missing on node %s" %
962
                          (volume, node))
963
          bad = True
964

    
965
    if instanceconfig.admin_up:
966
      if ((node_current not in node_instance or
967
          not instance in node_instance[node_current]) and
968
          node_current not in n_offline):
969
        feedback_fn("  - ERROR: instance %s not running on node %s" %
970
                        (instance, node_current))
971
        bad = True
972

    
973
    for node in node_instance:
974
      if (not node == node_current):
975
        if instance in node_instance[node]:
976
          feedback_fn("  - ERROR: instance %s should not run on node %s" %
977
                          (instance, node))
978
          bad = True
979

    
980
    return bad
981

    
982
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
983
    """Verify if there are any unknown volumes in the cluster.
984

985
    The .os, .swap and backup volumes are ignored. All other volumes are
986
    reported as unknown.
987

988
    """
989
    bad = False
990

    
991
    for node in node_vol_is:
992
      for volume in node_vol_is[node]:
993
        if node not in node_vol_should or volume not in node_vol_should[node]:
994
          feedback_fn("  - ERROR: volume %s on node %s should not exist" %
995
                      (volume, node))
996
          bad = True
997
    return bad
998

    
999
  def _VerifyOrphanInstances(self, instancelist, node_instance, feedback_fn):
1000
    """Verify the list of running instances.
1001

1002
    This checks what instances are running but unknown to the cluster.
1003

1004
    """
1005
    bad = False
1006
    for node in node_instance:
1007
      for runninginstance in node_instance[node]:
1008
        if runninginstance not in instancelist:
1009
          feedback_fn("  - ERROR: instance %s on node %s should not exist" %
1010
                          (runninginstance, node))
1011
          bad = True
1012
    return bad
1013

    
1014
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg, feedback_fn):
1015
    """Verify N+1 Memory Resilience.
1016

1017
    Check that if one single node dies we can still start all the instances it
1018
    was primary for.
1019

1020
    """
1021
    bad = False
1022

    
1023
    for node, nodeinfo in node_info.iteritems():
1024
      # This code checks that every node which is now listed as secondary has
1025
      # enough memory to host all instances it is supposed to should a single
1026
      # other node in the cluster fail.
1027
      # FIXME: not ready for failover to an arbitrary node
1028
      # FIXME: does not support file-backed instances
1029
      # WARNING: we currently take into account down instances as well as up
1030
      # ones, considering that even if they're down someone might want to start
1031
      # them even in the event of a node failure.
1032
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1033
        needed_mem = 0
1034
        for instance in instances:
1035
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1036
          if bep[constants.BE_AUTO_BALANCE]:
1037
            needed_mem += bep[constants.BE_MEMORY]
1038
        if nodeinfo['mfree'] < needed_mem:
1039
          feedback_fn("  - ERROR: not enough memory on node %s to accommodate"
1040
                      " failovers should node %s fail" % (node, prinode))
1041
          bad = True
1042
    return bad
1043

    
1044
  def CheckPrereq(self):
1045
    """Check prerequisites.
1046

1047
    Transform the list of checks we're going to skip into a set and check that
1048
    all its members are valid.
1049

1050
    """
1051
    self.skip_set = frozenset(self.op.skip_checks)
1052
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1053
      raise errors.OpPrereqError("Invalid checks to be skipped specified")
1054

    
1055
  def BuildHooksEnv(self):
1056
    """Build hooks env.
1057

1058
    Cluster-Verify hooks just ran in the post phase and their failure makes
1059
    the output be logged in the verify output and the verification to fail.
1060

1061
    """
1062
    all_nodes = self.cfg.GetNodeList()
1063
    env = {
1064
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1065
      }
1066
    for node in self.cfg.GetAllNodesInfo().values():
1067
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1068

    
1069
    return env, [], all_nodes
1070

    
1071
  def Exec(self, feedback_fn):
1072
    """Verify integrity of cluster, performing various test on nodes.
1073

1074
    """
1075
    bad = False
1076
    feedback_fn("* Verifying global settings")
1077
    for msg in self.cfg.VerifyConfig():
1078
      feedback_fn("  - ERROR: %s" % msg)
1079

    
1080
    vg_name = self.cfg.GetVGName()
1081
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1082
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1083
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1084
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1085
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1086
                        for iname in instancelist)
1087
    i_non_redundant = [] # Non redundant instances
1088
    i_non_a_balanced = [] # Non auto-balanced instances
1089
    n_offline = [] # List of offline nodes
1090
    n_drained = [] # List of nodes being drained
1091
    node_volume = {}
1092
    node_instance = {}
1093
    node_info = {}
1094
    instance_cfg = {}
1095

    
1096
    # FIXME: verify OS list
1097
    # do local checksums
1098
    master_files = [constants.CLUSTER_CONF_FILE]
1099

    
1100
    file_names = ssconf.SimpleStore().GetFileList()
1101
    file_names.append(constants.SSL_CERT_FILE)
1102
    file_names.append(constants.RAPI_CERT_FILE)
1103
    file_names.extend(master_files)
1104

    
1105
    local_checksums = utils.FingerprintFiles(file_names)
1106

    
1107
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1108
    node_verify_param = {
1109
      constants.NV_FILELIST: file_names,
1110
      constants.NV_NODELIST: [node.name for node in nodeinfo
1111
                              if not node.offline],
1112
      constants.NV_HYPERVISOR: hypervisors,
1113
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1114
                                  node.secondary_ip) for node in nodeinfo
1115
                                 if not node.offline],
1116
      constants.NV_INSTANCELIST: hypervisors,
1117
      constants.NV_VERSION: None,
1118
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1119
      }
1120
    if vg_name is not None:
1121
      node_verify_param[constants.NV_VGLIST] = None
1122
      node_verify_param[constants.NV_LVLIST] = vg_name
1123
      node_verify_param[constants.NV_DRBDLIST] = None
1124
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1125
                                           self.cfg.GetClusterName())
1126

    
1127
    cluster = self.cfg.GetClusterInfo()
1128
    master_node = self.cfg.GetMasterNode()
1129
    all_drbd_map = self.cfg.ComputeDRBDMap()
1130

    
1131
    for node_i in nodeinfo:
1132
      node = node_i.name
1133

    
1134
      if node_i.offline:
1135
        feedback_fn("* Skipping offline node %s" % (node,))
1136
        n_offline.append(node)
1137
        continue
1138

    
1139
      if node == master_node:
1140
        ntype = "master"
1141
      elif node_i.master_candidate:
1142
        ntype = "master candidate"
1143
      elif node_i.drained:
1144
        ntype = "drained"
1145
        n_drained.append(node)
1146
      else:
1147
        ntype = "regular"
1148
      feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1149

    
1150
      msg = all_nvinfo[node].fail_msg
1151
      if msg:
1152
        feedback_fn("  - ERROR: while contacting node %s: %s" % (node, msg))
1153
        bad = True
1154
        continue
1155

    
1156
      nresult = all_nvinfo[node].payload
1157
      node_drbd = {}
1158
      for minor, instance in all_drbd_map[node].items():
1159
        if instance not in instanceinfo:
1160
          feedback_fn("  - ERROR: ghost instance '%s' in temporary DRBD map" %
1161
                      instance)
1162
          # ghost instance should not be running, but otherwise we
1163
          # don't give double warnings (both ghost instance and
1164
          # unallocated minor in use)
1165
          node_drbd[minor] = (instance, False)
1166
        else:
1167
          instance = instanceinfo[instance]
1168
          node_drbd[minor] = (instance.name, instance.admin_up)
1169
      result = self._VerifyNode(node_i, file_names, local_checksums,
1170
                                nresult, feedback_fn, master_files,
1171
                                node_drbd, vg_name)
1172
      bad = bad or result
1173

    
1174
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1175
      if vg_name is None:
1176
        node_volume[node] = {}
1177
      elif isinstance(lvdata, basestring):
1178
        feedback_fn("  - ERROR: LVM problem on node %s: %s" %
1179
                    (node, utils.SafeEncode(lvdata)))
1180
        bad = True
1181
        node_volume[node] = {}
1182
      elif not isinstance(lvdata, dict):
1183
        feedback_fn("  - ERROR: connection to %s failed (lvlist)" % (node,))
1184
        bad = True
1185
        continue
1186
      else:
1187
        node_volume[node] = lvdata
1188

    
1189
      # node_instance
1190
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1191
      if not isinstance(idata, list):
1192
        feedback_fn("  - ERROR: connection to %s failed (instancelist)" %
1193
                    (node,))
1194
        bad = True
1195
        continue
1196

    
1197
      node_instance[node] = idata
1198

    
1199
      # node_info
1200
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1201
      if not isinstance(nodeinfo, dict):
1202
        feedback_fn("  - ERROR: connection to %s failed (hvinfo)" % (node,))
1203
        bad = True
1204
        continue
1205

    
1206
      try:
1207
        node_info[node] = {
1208
          "mfree": int(nodeinfo['memory_free']),
1209
          "pinst": [],
1210
          "sinst": [],
1211
          # dictionary holding all instances this node is secondary for,
1212
          # grouped by their primary node. Each key is a cluster node, and each
1213
          # value is a list of instances which have the key as primary and the
1214
          # current node as secondary.  this is handy to calculate N+1 memory
1215
          # availability if you can only failover from a primary to its
1216
          # secondary.
1217
          "sinst-by-pnode": {},
1218
        }
1219
        # FIXME: devise a free space model for file based instances as well
1220
        if vg_name is not None:
1221
          if (constants.NV_VGLIST not in nresult or
1222
              vg_name not in nresult[constants.NV_VGLIST]):
1223
            feedback_fn("  - ERROR: node %s didn't return data for the"
1224
                        " volume group '%s' - it is either missing or broken" %
1225
                        (node, vg_name))
1226
            bad = True
1227
            continue
1228
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1229
      except (ValueError, KeyError):
1230
        feedback_fn("  - ERROR: invalid nodeinfo value returned"
1231
                    " from node %s" % (node,))
1232
        bad = True
1233
        continue
1234

    
1235
    node_vol_should = {}
1236

    
1237
    for instance in instancelist:
1238
      feedback_fn("* Verifying instance %s" % instance)
1239
      inst_config = instanceinfo[instance]
1240
      result =  self._VerifyInstance(instance, inst_config, node_volume,
1241
                                     node_instance, feedback_fn, n_offline)
1242
      bad = bad or result
1243
      inst_nodes_offline = []
1244

    
1245
      inst_config.MapLVsByNode(node_vol_should)
1246

    
1247
      instance_cfg[instance] = inst_config
1248

    
1249
      pnode = inst_config.primary_node
1250
      if pnode in node_info:
1251
        node_info[pnode]['pinst'].append(instance)
1252
      elif pnode not in n_offline:
1253
        feedback_fn("  - ERROR: instance %s, connection to primary node"
1254
                    " %s failed" % (instance, pnode))
1255
        bad = True
1256

    
1257
      if pnode in n_offline:
1258
        inst_nodes_offline.append(pnode)
1259

    
1260
      # If the instance is non-redundant we cannot survive losing its primary
1261
      # node, so we are not N+1 compliant. On the other hand we have no disk
1262
      # templates with more than one secondary so that situation is not well
1263
      # supported either.
1264
      # FIXME: does not support file-backed instances
1265
      if len(inst_config.secondary_nodes) == 0:
1266
        i_non_redundant.append(instance)
1267
      elif len(inst_config.secondary_nodes) > 1:
1268
        feedback_fn("  - WARNING: multiple secondaries for instance %s"
1269
                    % instance)
1270

    
1271
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1272
        i_non_a_balanced.append(instance)
1273

    
1274
      for snode in inst_config.secondary_nodes:
1275
        if snode in node_info:
1276
          node_info[snode]['sinst'].append(instance)
1277
          if pnode not in node_info[snode]['sinst-by-pnode']:
1278
            node_info[snode]['sinst-by-pnode'][pnode] = []
1279
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1280
        elif snode not in n_offline:
1281
          feedback_fn("  - ERROR: instance %s, connection to secondary node"
1282
                      " %s failed" % (instance, snode))
1283
          bad = True
1284
        if snode in n_offline:
1285
          inst_nodes_offline.append(snode)
1286

    
1287
      if inst_nodes_offline:
1288
        # warn that the instance lives on offline nodes, and set bad=True
1289
        feedback_fn("  - ERROR: instance lives on offline node(s) %s" %
1290
                    ", ".join(inst_nodes_offline))
1291
        bad = True
1292

    
1293
    feedback_fn("* Verifying orphan volumes")
1294
    result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
1295
                                       feedback_fn)
1296
    bad = bad or result
1297

    
1298
    feedback_fn("* Verifying remaining instances")
1299
    result = self._VerifyOrphanInstances(instancelist, node_instance,
1300
                                         feedback_fn)
1301
    bad = bad or result
1302

    
1303
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1304
      feedback_fn("* Verifying N+1 Memory redundancy")
1305
      result = self._VerifyNPlusOneMemory(node_info, instance_cfg, feedback_fn)
1306
      bad = bad or result
1307

    
1308
    feedback_fn("* Other Notes")
1309
    if i_non_redundant:
1310
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1311
                  % len(i_non_redundant))
1312

    
1313
    if i_non_a_balanced:
1314
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1315
                  % len(i_non_a_balanced))
1316

    
1317
    if n_offline:
1318
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1319

    
1320
    if n_drained:
1321
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1322

    
1323
    return not bad
1324

    
1325
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1326
    """Analyze the post-hooks' result
1327

1328
    This method analyses the hook result, handles it, and sends some
1329
    nicely-formatted feedback back to the user.
1330

1331
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1332
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1333
    @param hooks_results: the results of the multi-node hooks rpc call
1334
    @param feedback_fn: function used send feedback back to the caller
1335
    @param lu_result: previous Exec result
1336
    @return: the new Exec result, based on the previous result
1337
        and hook results
1338

1339
    """
1340
    # We only really run POST phase hooks, and are only interested in
1341
    # their results
1342
    if phase == constants.HOOKS_PHASE_POST:
1343
      # Used to change hooks' output to proper indentation
1344
      indent_re = re.compile('^', re.M)
1345
      feedback_fn("* Hooks Results")
1346
      if not hooks_results:
1347
        feedback_fn("  - ERROR: general communication failure")
1348
        lu_result = 1
1349
      else:
1350
        for node_name in hooks_results:
1351
          show_node_header = True
1352
          res = hooks_results[node_name]
1353
          msg = res.fail_msg
1354
          if msg:
1355
            if res.offline:
1356
              # no need to warn or set fail return value
1357
              continue
1358
            feedback_fn("    Communication failure in hooks execution: %s" %
1359
                        msg)
1360
            lu_result = 1
1361
            continue
1362
          for script, hkr, output in res.payload:
1363
            if hkr == constants.HKR_FAIL:
1364
              # The node header is only shown once, if there are
1365
              # failing hooks on that node
1366
              if show_node_header:
1367
                feedback_fn("  Node %s:" % node_name)
1368
                show_node_header = False
1369
              feedback_fn("    ERROR: Script %s failed, output:" % script)
1370
              output = indent_re.sub('      ', output)
1371
              feedback_fn("%s" % output)
1372
              lu_result = 1
1373

    
1374
      return lu_result
1375

    
1376

    
1377
class LUVerifyDisks(NoHooksLU):
1378
  """Verifies the cluster disks status.
1379

1380
  """
1381
  _OP_REQP = []
1382
  REQ_BGL = False
1383

    
1384
  def ExpandNames(self):
1385
    self.needed_locks = {
1386
      locking.LEVEL_NODE: locking.ALL_SET,
1387
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1388
    }
1389
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1390

    
1391
  def CheckPrereq(self):
1392
    """Check prerequisites.
1393

1394
    This has no prerequisites.
1395

1396
    """
1397
    pass
1398

    
1399
  def Exec(self, feedback_fn):
1400
    """Verify integrity of cluster disks.
1401

1402
    @rtype: tuple of three items
1403
    @return: a tuple of (dict of node-to-node_error, list of instances
1404
        which need activate-disks, dict of instance: (node, volume) for
1405
        missing volumes
1406

1407
    """
1408
    result = res_nodes, res_instances, res_missing = {}, [], {}
1409

    
1410
    vg_name = self.cfg.GetVGName()
1411
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1412
    instances = [self.cfg.GetInstanceInfo(name)
1413
                 for name in self.cfg.GetInstanceList()]
1414

    
1415
    nv_dict = {}
1416
    for inst in instances:
1417
      inst_lvs = {}
1418
      if (not inst.admin_up or
1419
          inst.disk_template not in constants.DTS_NET_MIRROR):
1420
        continue
1421
      inst.MapLVsByNode(inst_lvs)
1422
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1423
      for node, vol_list in inst_lvs.iteritems():
1424
        for vol in vol_list:
1425
          nv_dict[(node, vol)] = inst
1426

    
1427
    if not nv_dict:
1428
      return result
1429

    
1430
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1431

    
1432
    for node in nodes:
1433
      # node_volume
1434
      node_res = node_lvs[node]
1435
      if node_res.offline:
1436
        continue
1437
      msg = node_res.fail_msg
1438
      if msg:
1439
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1440
        res_nodes[node] = msg
1441
        continue
1442

    
1443
      lvs = node_res.payload
1444
      for lv_name, (_, lv_inactive, lv_online) in lvs.items():
1445
        inst = nv_dict.pop((node, lv_name), None)
1446
        if (not lv_online and inst is not None
1447
            and inst.name not in res_instances):
1448
          res_instances.append(inst.name)
1449

    
1450
    # any leftover items in nv_dict are missing LVs, let's arrange the
1451
    # data better
1452
    for key, inst in nv_dict.iteritems():
1453
      if inst.name not in res_missing:
1454
        res_missing[inst.name] = []
1455
      res_missing[inst.name].append(key)
1456

    
1457
    return result
1458

    
1459

    
1460
class LURenameCluster(LogicalUnit):
1461
  """Rename the cluster.
1462

1463
  """
1464
  HPATH = "cluster-rename"
1465
  HTYPE = constants.HTYPE_CLUSTER
1466
  _OP_REQP = ["name"]
1467

    
1468
  def BuildHooksEnv(self):
1469
    """Build hooks env.
1470

1471
    """
1472
    env = {
1473
      "OP_TARGET": self.cfg.GetClusterName(),
1474
      "NEW_NAME": self.op.name,
1475
      }
1476
    mn = self.cfg.GetMasterNode()
1477
    return env, [mn], [mn]
1478

    
1479
  def CheckPrereq(self):
1480
    """Verify that the passed name is a valid one.
1481

1482
    """
1483
    hostname = utils.HostInfo(self.op.name)
1484

    
1485
    new_name = hostname.name
1486
    self.ip = new_ip = hostname.ip
1487
    old_name = self.cfg.GetClusterName()
1488
    old_ip = self.cfg.GetMasterIP()
1489
    if new_name == old_name and new_ip == old_ip:
1490
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1491
                                 " cluster has changed")
1492
    if new_ip != old_ip:
1493
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1494
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1495
                                   " reachable on the network. Aborting." %
1496
                                   new_ip)
1497

    
1498
    self.op.name = new_name
1499

    
1500
  def Exec(self, feedback_fn):
1501
    """Rename the cluster.
1502

1503
    """
1504
    clustername = self.op.name
1505
    ip = self.ip
1506

    
1507
    # shutdown the master IP
1508
    master = self.cfg.GetMasterNode()
1509
    result = self.rpc.call_node_stop_master(master, False)
1510
    result.Raise("Could not disable the master role")
1511

    
1512
    try:
1513
      cluster = self.cfg.GetClusterInfo()
1514
      cluster.cluster_name = clustername
1515
      cluster.master_ip = ip
1516
      self.cfg.Update(cluster)
1517

    
1518
      # update the known hosts file
1519
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1520
      node_list = self.cfg.GetNodeList()
1521
      try:
1522
        node_list.remove(master)
1523
      except ValueError:
1524
        pass
1525
      result = self.rpc.call_upload_file(node_list,
1526
                                         constants.SSH_KNOWN_HOSTS_FILE)
1527
      for to_node, to_result in result.iteritems():
1528
        msg = to_result.fail_msg
1529
        if msg:
1530
          msg = ("Copy of file %s to node %s failed: %s" %
1531
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1532
          self.proc.LogWarning(msg)
1533

    
1534
    finally:
1535
      result = self.rpc.call_node_start_master(master, False, False)
1536
      msg = result.fail_msg
1537
      if msg:
1538
        self.LogWarning("Could not re-enable the master role on"
1539
                        " the master, please restart manually: %s", msg)
1540

    
1541

    
1542
def _RecursiveCheckIfLVMBased(disk):
1543
  """Check if the given disk or its children are lvm-based.
1544

1545
  @type disk: L{objects.Disk}
1546
  @param disk: the disk to check
1547
  @rtype: boolean
1548
  @return: boolean indicating whether a LD_LV dev_type was found or not
1549

1550
  """
1551
  if disk.children:
1552
    for chdisk in disk.children:
1553
      if _RecursiveCheckIfLVMBased(chdisk):
1554
        return True
1555
  return disk.dev_type == constants.LD_LV
1556

    
1557

    
1558
class LUSetClusterParams(LogicalUnit):
1559
  """Change the parameters of the cluster.
1560

1561
  """
1562
  HPATH = "cluster-modify"
1563
  HTYPE = constants.HTYPE_CLUSTER
1564
  _OP_REQP = []
1565
  REQ_BGL = False
1566

    
1567
  def CheckArguments(self):
1568
    """Check parameters
1569

1570
    """
1571
    if not hasattr(self.op, "candidate_pool_size"):
1572
      self.op.candidate_pool_size = None
1573
    if self.op.candidate_pool_size is not None:
1574
      try:
1575
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1576
      except (ValueError, TypeError), err:
1577
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1578
                                   str(err))
1579
      if self.op.candidate_pool_size < 1:
1580
        raise errors.OpPrereqError("At least one master candidate needed")
1581

    
1582
  def ExpandNames(self):
1583
    # FIXME: in the future maybe other cluster params won't require checking on
1584
    # all nodes to be modified.
1585
    self.needed_locks = {
1586
      locking.LEVEL_NODE: locking.ALL_SET,
1587
    }
1588
    self.share_locks[locking.LEVEL_NODE] = 1
1589

    
1590
  def BuildHooksEnv(self):
1591
    """Build hooks env.
1592

1593
    """
1594
    env = {
1595
      "OP_TARGET": self.cfg.GetClusterName(),
1596
      "NEW_VG_NAME": self.op.vg_name,
1597
      }
1598
    mn = self.cfg.GetMasterNode()
1599
    return env, [mn], [mn]
1600

    
1601
  def CheckPrereq(self):
1602
    """Check prerequisites.
1603

1604
    This checks whether the given params don't conflict and
1605
    if the given volume group is valid.
1606

1607
    """
1608
    if self.op.vg_name is not None and not self.op.vg_name:
1609
      instances = self.cfg.GetAllInstancesInfo().values()
1610
      for inst in instances:
1611
        for disk in inst.disks:
1612
          if _RecursiveCheckIfLVMBased(disk):
1613
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1614
                                       " lvm-based instances exist")
1615

    
1616
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1617

    
1618
    # if vg_name not None, checks given volume group on all nodes
1619
    if self.op.vg_name:
1620
      vglist = self.rpc.call_vg_list(node_list)
1621
      for node in node_list:
1622
        msg = vglist[node].fail_msg
1623
        if msg:
1624
          # ignoring down node
1625
          self.LogWarning("Error while gathering data on node %s"
1626
                          " (ignoring node): %s", node, msg)
1627
          continue
1628
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
1629
                                              self.op.vg_name,
1630
                                              constants.MIN_VG_SIZE)
1631
        if vgstatus:
1632
          raise errors.OpPrereqError("Error on node '%s': %s" %
1633
                                     (node, vgstatus))
1634

    
1635
    self.cluster = cluster = self.cfg.GetClusterInfo()
1636
    # validate params changes
1637
    if self.op.beparams:
1638
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1639
      self.new_beparams = objects.FillDict(
1640
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
1641

    
1642
    if self.op.nicparams:
1643
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
1644
      self.new_nicparams = objects.FillDict(
1645
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
1646
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
1647

    
1648
    # hypervisor list/parameters
1649
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
1650
    if self.op.hvparams:
1651
      if not isinstance(self.op.hvparams, dict):
1652
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
1653
      for hv_name, hv_dict in self.op.hvparams.items():
1654
        if hv_name not in self.new_hvparams:
1655
          self.new_hvparams[hv_name] = hv_dict
1656
        else:
1657
          self.new_hvparams[hv_name].update(hv_dict)
1658

    
1659
    if self.op.enabled_hypervisors is not None:
1660
      self.hv_list = self.op.enabled_hypervisors
1661
      if not self.hv_list:
1662
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
1663
                                   " least one member")
1664
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
1665
      if invalid_hvs:
1666
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
1667
                                   " entries: %s" % invalid_hvs)
1668
    else:
1669
      self.hv_list = cluster.enabled_hypervisors
1670

    
1671
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
1672
      # either the enabled list has changed, or the parameters have, validate
1673
      for hv_name, hv_params in self.new_hvparams.items():
1674
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
1675
            (self.op.enabled_hypervisors and
1676
             hv_name in self.op.enabled_hypervisors)):
1677
          # either this is a new hypervisor, or its parameters have changed
1678
          hv_class = hypervisor.GetHypervisor(hv_name)
1679
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1680
          hv_class.CheckParameterSyntax(hv_params)
1681
          _CheckHVParams(self, node_list, hv_name, hv_params)
1682

    
1683
  def Exec(self, feedback_fn):
1684
    """Change the parameters of the cluster.
1685

1686
    """
1687
    if self.op.vg_name is not None:
1688
      new_volume = self.op.vg_name
1689
      if not new_volume:
1690
        new_volume = None
1691
      if new_volume != self.cfg.GetVGName():
1692
        self.cfg.SetVGName(new_volume)
1693
      else:
1694
        feedback_fn("Cluster LVM configuration already in desired"
1695
                    " state, not changing")
1696
    if self.op.hvparams:
1697
      self.cluster.hvparams = self.new_hvparams
1698
    if self.op.enabled_hypervisors is not None:
1699
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1700
    if self.op.beparams:
1701
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
1702
    if self.op.nicparams:
1703
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
1704

    
1705
    if self.op.candidate_pool_size is not None:
1706
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
1707
      # we need to update the pool size here, otherwise the save will fail
1708
      _AdjustCandidatePool(self)
1709

    
1710
    self.cfg.Update(self.cluster)
1711

    
1712

    
1713
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
1714
  """Distribute additional files which are part of the cluster configuration.
1715

1716
  ConfigWriter takes care of distributing the config and ssconf files, but
1717
  there are more files which should be distributed to all nodes. This function
1718
  makes sure those are copied.
1719

1720
  @param lu: calling logical unit
1721
  @param additional_nodes: list of nodes not in the config to distribute to
1722

1723
  """
1724
  # 1. Gather target nodes
1725
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
1726
  dist_nodes = lu.cfg.GetNodeList()
1727
  if additional_nodes is not None:
1728
    dist_nodes.extend(additional_nodes)
1729
  if myself.name in dist_nodes:
1730
    dist_nodes.remove(myself.name)
1731
  # 2. Gather files to distribute
1732
  dist_files = set([constants.ETC_HOSTS,
1733
                    constants.SSH_KNOWN_HOSTS_FILE,
1734
                    constants.RAPI_CERT_FILE,
1735
                    constants.RAPI_USERS_FILE,
1736
                    constants.HMAC_CLUSTER_KEY,
1737
                   ])
1738

    
1739
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
1740
  for hv_name in enabled_hypervisors:
1741
    hv_class = hypervisor.GetHypervisor(hv_name)
1742
    dist_files.update(hv_class.GetAncillaryFiles())
1743

    
1744
  # 3. Perform the files upload
1745
  for fname in dist_files:
1746
    if os.path.exists(fname):
1747
      result = lu.rpc.call_upload_file(dist_nodes, fname)
1748
      for to_node, to_result in result.items():
1749
        msg = to_result.fail_msg
1750
        if msg:
1751
          msg = ("Copy of file %s to node %s failed: %s" %
1752
                 (fname, to_node, msg))
1753
          lu.proc.LogWarning(msg)
1754

    
1755

    
1756
class LURedistributeConfig(NoHooksLU):
1757
  """Force the redistribution of cluster configuration.
1758

1759
  This is a very simple LU.
1760

1761
  """
1762
  _OP_REQP = []
1763
  REQ_BGL = False
1764

    
1765
  def ExpandNames(self):
1766
    self.needed_locks = {
1767
      locking.LEVEL_NODE: locking.ALL_SET,
1768
    }
1769
    self.share_locks[locking.LEVEL_NODE] = 1
1770

    
1771
  def CheckPrereq(self):
1772
    """Check prerequisites.
1773

1774
    """
1775

    
1776
  def Exec(self, feedback_fn):
1777
    """Redistribute the configuration.
1778

1779
    """
1780
    self.cfg.Update(self.cfg.GetClusterInfo())
1781
    _RedistributeAncillaryFiles(self)
1782

    
1783

    
1784
def _WaitForSync(lu, instance, oneshot=False, unlock=False):
1785
  """Sleep and poll for an instance's disk to sync.
1786

1787
  """
1788
  if not instance.disks:
1789
    return True
1790

    
1791
  if not oneshot:
1792
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
1793

    
1794
  node = instance.primary_node
1795

    
1796
  for dev in instance.disks:
1797
    lu.cfg.SetDiskID(dev, node)
1798

    
1799
  retries = 0
1800
  degr_retries = 10 # in seconds, as we sleep 1 second each time
1801
  while True:
1802
    max_time = 0
1803
    done = True
1804
    cumul_degraded = False
1805
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
1806
    msg = rstats.fail_msg
1807
    if msg:
1808
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
1809
      retries += 1
1810
      if retries >= 10:
1811
        raise errors.RemoteError("Can't contact node %s for mirror data,"
1812
                                 " aborting." % node)
1813
      time.sleep(6)
1814
      continue
1815
    rstats = rstats.payload
1816
    retries = 0
1817
    for i, mstat in enumerate(rstats):
1818
      if mstat is None:
1819
        lu.LogWarning("Can't compute data for node %s/%s",
1820
                           node, instance.disks[i].iv_name)
1821
        continue
1822
      # we ignore the ldisk parameter
1823
      perc_done, est_time, is_degraded, _ = mstat
1824
      cumul_degraded = cumul_degraded or (is_degraded and perc_done is None)
1825
      if perc_done is not None:
1826
        done = False
1827
        if est_time is not None:
1828
          rem_time = "%d estimated seconds remaining" % est_time
1829
          max_time = est_time
1830
        else:
1831
          rem_time = "no time estimate"
1832
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
1833
                        (instance.disks[i].iv_name, perc_done, rem_time))
1834

    
1835
    # if we're done but degraded, let's do a few small retries, to
1836
    # make sure we see a stable and not transient situation; therefore
1837
    # we force restart of the loop
1838
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
1839
      logging.info("Degraded disks found, %d retries left", degr_retries)
1840
      degr_retries -= 1
1841
      time.sleep(1)
1842
      continue
1843

    
1844
    if done or oneshot:
1845
      break
1846

    
1847
    time.sleep(min(60, max_time))
1848

    
1849
  if done:
1850
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
1851
  return not cumul_degraded
1852

    
1853

    
1854
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
1855
  """Check that mirrors are not degraded.
1856

1857
  The ldisk parameter, if True, will change the test from the
1858
  is_degraded attribute (which represents overall non-ok status for
1859
  the device(s)) to the ldisk (representing the local storage status).
1860

1861
  """
1862
  lu.cfg.SetDiskID(dev, node)
1863
  if ldisk:
1864
    idx = 6
1865
  else:
1866
    idx = 5
1867

    
1868
  result = True
1869
  if on_primary or dev.AssembleOnSecondary():
1870
    rstats = lu.rpc.call_blockdev_find(node, dev)
1871
    msg = rstats.fail_msg
1872
    if msg:
1873
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
1874
      result = False
1875
    elif not rstats.payload:
1876
      lu.LogWarning("Can't find disk on node %s", node)
1877
      result = False
1878
    else:
1879
      result = result and (not rstats.payload[idx])
1880
  if dev.children:
1881
    for child in dev.children:
1882
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
1883

    
1884
  return result
1885

    
1886

    
1887
class LUDiagnoseOS(NoHooksLU):
1888
  """Logical unit for OS diagnose/query.
1889

1890
  """
1891
  _OP_REQP = ["output_fields", "names"]
1892
  REQ_BGL = False
1893
  _FIELDS_STATIC = utils.FieldSet()
1894
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status")
1895

    
1896
  def ExpandNames(self):
1897
    if self.op.names:
1898
      raise errors.OpPrereqError("Selective OS query not supported")
1899

    
1900
    _CheckOutputFields(static=self._FIELDS_STATIC,
1901
                       dynamic=self._FIELDS_DYNAMIC,
1902
                       selected=self.op.output_fields)
1903

    
1904
    # Lock all nodes, in shared mode
1905
    # Temporary removal of locks, should be reverted later
1906
    # TODO: reintroduce locks when they are lighter-weight
1907
    self.needed_locks = {}
1908
    #self.share_locks[locking.LEVEL_NODE] = 1
1909
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
1910

    
1911
  def CheckPrereq(self):
1912
    """Check prerequisites.
1913

1914
    """
1915

    
1916
  @staticmethod
1917
  def _DiagnoseByOS(node_list, rlist):
1918
    """Remaps a per-node return list into an a per-os per-node dictionary
1919

1920
    @param node_list: a list with the names of all nodes
1921
    @param rlist: a map with node names as keys and OS objects as values
1922

1923
    @rtype: dict
1924
    @return: a dictionary with osnames as keys and as value another map, with
1925
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
1926

1927
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
1928
                                     (/srv/..., False, "invalid api")],
1929
                           "node2": [(/srv/..., True, "")]}
1930
          }
1931

1932
    """
1933
    all_os = {}
1934
    # we build here the list of nodes that didn't fail the RPC (at RPC
1935
    # level), so that nodes with a non-responding node daemon don't
1936
    # make all OSes invalid
1937
    good_nodes = [node_name for node_name in rlist
1938
                  if not rlist[node_name].fail_msg]
1939
    for node_name, nr in rlist.items():
1940
      if nr.fail_msg or not nr.payload:
1941
        continue
1942
      for name, path, status, diagnose in nr.payload:
1943
        if name not in all_os:
1944
          # build a list of nodes for this os containing empty lists
1945
          # for each node in node_list
1946
          all_os[name] = {}
1947
          for nname in good_nodes:
1948
            all_os[name][nname] = []
1949
        all_os[name][node_name].append((path, status, diagnose))
1950
    return all_os
1951

    
1952
  def Exec(self, feedback_fn):
1953
    """Compute the list of OSes.
1954

1955
    """
1956
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
1957
    node_data = self.rpc.call_os_diagnose(valid_nodes)
1958
    pol = self._DiagnoseByOS(valid_nodes, node_data)
1959
    output = []
1960
    for os_name, os_data in pol.items():
1961
      row = []
1962
      for field in self.op.output_fields:
1963
        if field == "name":
1964
          val = os_name
1965
        elif field == "valid":
1966
          val = utils.all([osl and osl[0][1] for osl in os_data.values()])
1967
        elif field == "node_status":
1968
          # this is just a copy of the dict
1969
          val = {}
1970
          for node_name, nos_list in os_data.items():
1971
            val[node_name] = nos_list
1972
        else:
1973
          raise errors.ParameterError(field)
1974
        row.append(val)
1975
      output.append(row)
1976

    
1977
    return output
1978

    
1979

    
1980
class LURemoveNode(LogicalUnit):
1981
  """Logical unit for removing a node.
1982

1983
  """
1984
  HPATH = "node-remove"
1985
  HTYPE = constants.HTYPE_NODE
1986
  _OP_REQP = ["node_name"]
1987

    
1988
  def BuildHooksEnv(self):
1989
    """Build hooks env.
1990

1991
    This doesn't run on the target node in the pre phase as a failed
1992
    node would then be impossible to remove.
1993

1994
    """
1995
    env = {
1996
      "OP_TARGET": self.op.node_name,
1997
      "NODE_NAME": self.op.node_name,
1998
      }
1999
    all_nodes = self.cfg.GetNodeList()
2000
    all_nodes.remove(self.op.node_name)
2001
    return env, all_nodes, all_nodes
2002

    
2003
  def CheckPrereq(self):
2004
    """Check prerequisites.
2005

2006
    This checks:
2007
     - the node exists in the configuration
2008
     - it does not have primary or secondary instances
2009
     - it's not the master
2010

2011
    Any errors are signaled by raising errors.OpPrereqError.
2012

2013
    """
2014
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
2015
    if node is None:
2016
      raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
2017

    
2018
    instance_list = self.cfg.GetInstanceList()
2019

    
2020
    masternode = self.cfg.GetMasterNode()
2021
    if node.name == masternode:
2022
      raise errors.OpPrereqError("Node is the master node,"
2023
                                 " you need to failover first.")
2024

    
2025
    for instance_name in instance_list:
2026
      instance = self.cfg.GetInstanceInfo(instance_name)
2027
      if node.name in instance.all_nodes:
2028
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2029
                                   " please remove first." % instance_name)
2030
    self.op.node_name = node.name
2031
    self.node = node
2032

    
2033
  def Exec(self, feedback_fn):
2034
    """Removes the node from the cluster.
2035

2036
    """
2037
    node = self.node
2038
    logging.info("Stopping the node daemon and removing configs from node %s",
2039
                 node.name)
2040

    
2041
    self.context.RemoveNode(node.name)
2042

    
2043
    result = self.rpc.call_node_leave_cluster(node.name)
2044
    msg = result.fail_msg
2045
    if msg:
2046
      self.LogWarning("Errors encountered on the remote node while leaving"
2047
                      " the cluster: %s", msg)
2048

    
2049
    # Promote nodes to master candidate as needed
2050
    _AdjustCandidatePool(self)
2051

    
2052

    
2053
class LUQueryNodes(NoHooksLU):
2054
  """Logical unit for querying nodes.
2055

2056
  """
2057
  _OP_REQP = ["output_fields", "names", "use_locking"]
2058
  REQ_BGL = False
2059
  _FIELDS_DYNAMIC = utils.FieldSet(
2060
    "dtotal", "dfree",
2061
    "mtotal", "mnode", "mfree",
2062
    "bootid",
2063
    "ctotal", "cnodes", "csockets",
2064
    )
2065

    
2066
  _FIELDS_STATIC = utils.FieldSet(
2067
    "name", "pinst_cnt", "sinst_cnt",
2068
    "pinst_list", "sinst_list",
2069
    "pip", "sip", "tags",
2070
    "serial_no",
2071
    "master_candidate",
2072
    "master",
2073
    "offline",
2074
    "drained",
2075
    "role",
2076
    )
2077

    
2078
  def ExpandNames(self):
2079
    _CheckOutputFields(static=self._FIELDS_STATIC,
2080
                       dynamic=self._FIELDS_DYNAMIC,
2081
                       selected=self.op.output_fields)
2082

    
2083
    self.needed_locks = {}
2084
    self.share_locks[locking.LEVEL_NODE] = 1
2085

    
2086
    if self.op.names:
2087
      self.wanted = _GetWantedNodes(self, self.op.names)
2088
    else:
2089
      self.wanted = locking.ALL_SET
2090

    
2091
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2092
    self.do_locking = self.do_node_query and self.op.use_locking
2093
    if self.do_locking:
2094
      # if we don't request only static fields, we need to lock the nodes
2095
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2096

    
2097

    
2098
  def CheckPrereq(self):
2099
    """Check prerequisites.
2100

2101
    """
2102
    # The validation of the node list is done in the _GetWantedNodes,
2103
    # if non empty, and if empty, there's no validation to do
2104
    pass
2105

    
2106
  def Exec(self, feedback_fn):
2107
    """Computes the list of nodes and their attributes.
2108

2109
    """
2110
    all_info = self.cfg.GetAllNodesInfo()
2111
    if self.do_locking:
2112
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2113
    elif self.wanted != locking.ALL_SET:
2114
      nodenames = self.wanted
2115
      missing = set(nodenames).difference(all_info.keys())
2116
      if missing:
2117
        raise errors.OpExecError(
2118
          "Some nodes were removed before retrieving their data: %s" % missing)
2119
    else:
2120
      nodenames = all_info.keys()
2121

    
2122
    nodenames = utils.NiceSort(nodenames)
2123
    nodelist = [all_info[name] for name in nodenames]
2124

    
2125
    # begin data gathering
2126

    
2127
    if self.do_node_query:
2128
      live_data = {}
2129
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2130
                                          self.cfg.GetHypervisorType())
2131
      for name in nodenames:
2132
        nodeinfo = node_data[name]
2133
        if not nodeinfo.fail_msg and nodeinfo.payload:
2134
          nodeinfo = nodeinfo.payload
2135
          fn = utils.TryConvert
2136
          live_data[name] = {
2137
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2138
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2139
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2140
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2141
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2142
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2143
            "bootid": nodeinfo.get('bootid', None),
2144
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2145
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2146
            }
2147
        else:
2148
          live_data[name] = {}
2149
    else:
2150
      live_data = dict.fromkeys(nodenames, {})
2151

    
2152
    node_to_primary = dict([(name, set()) for name in nodenames])
2153
    node_to_secondary = dict([(name, set()) for name in nodenames])
2154

    
2155
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2156
                             "sinst_cnt", "sinst_list"))
2157
    if inst_fields & frozenset(self.op.output_fields):
2158
      instancelist = self.cfg.GetInstanceList()
2159

    
2160
      for instance_name in instancelist:
2161
        inst = self.cfg.GetInstanceInfo(instance_name)
2162
        if inst.primary_node in node_to_primary:
2163
          node_to_primary[inst.primary_node].add(inst.name)
2164
        for secnode in inst.secondary_nodes:
2165
          if secnode in node_to_secondary:
2166
            node_to_secondary[secnode].add(inst.name)
2167

    
2168
    master_node = self.cfg.GetMasterNode()
2169

    
2170
    # end data gathering
2171

    
2172
    output = []
2173
    for node in nodelist:
2174
      node_output = []
2175
      for field in self.op.output_fields:
2176
        if field == "name":
2177
          val = node.name
2178
        elif field == "pinst_list":
2179
          val = list(node_to_primary[node.name])
2180
        elif field == "sinst_list":
2181
          val = list(node_to_secondary[node.name])
2182
        elif field == "pinst_cnt":
2183
          val = len(node_to_primary[node.name])
2184
        elif field == "sinst_cnt":
2185
          val = len(node_to_secondary[node.name])
2186
        elif field == "pip":
2187
          val = node.primary_ip
2188
        elif field == "sip":
2189
          val = node.secondary_ip
2190
        elif field == "tags":
2191
          val = list(node.GetTags())
2192
        elif field == "serial_no":
2193
          val = node.serial_no
2194
        elif field == "master_candidate":
2195
          val = node.master_candidate
2196
        elif field == "master":
2197
          val = node.name == master_node
2198
        elif field == "offline":
2199
          val = node.offline
2200
        elif field == "drained":
2201
          val = node.drained
2202
        elif self._FIELDS_DYNAMIC.Matches(field):
2203
          val = live_data[node.name].get(field, None)
2204
        elif field == "role":
2205
          if node.name == master_node:
2206
            val = "M"
2207
          elif node.master_candidate:
2208
            val = "C"
2209
          elif node.drained:
2210
            val = "D"
2211
          elif node.offline:
2212
            val = "O"
2213
          else:
2214
            val = "R"
2215
        else:
2216
          raise errors.ParameterError(field)
2217
        node_output.append(val)
2218
      output.append(node_output)
2219

    
2220
    return output
2221

    
2222

    
2223
class LUQueryNodeVolumes(NoHooksLU):
2224
  """Logical unit for getting volumes on node(s).
2225

2226
  """
2227
  _OP_REQP = ["nodes", "output_fields"]
2228
  REQ_BGL = False
2229
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2230
  _FIELDS_STATIC = utils.FieldSet("node")
2231

    
2232
  def ExpandNames(self):
2233
    _CheckOutputFields(static=self._FIELDS_STATIC,
2234
                       dynamic=self._FIELDS_DYNAMIC,
2235
                       selected=self.op.output_fields)
2236

    
2237
    self.needed_locks = {}
2238
    self.share_locks[locking.LEVEL_NODE] = 1
2239
    if not self.op.nodes:
2240
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2241
    else:
2242
      self.needed_locks[locking.LEVEL_NODE] = \
2243
        _GetWantedNodes(self, self.op.nodes)
2244

    
2245
  def CheckPrereq(self):
2246
    """Check prerequisites.
2247

2248
    This checks that the fields required are valid output fields.
2249

2250
    """
2251
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2252

    
2253
  def Exec(self, feedback_fn):
2254
    """Computes the list of nodes and their attributes.
2255

2256
    """
2257
    nodenames = self.nodes
2258
    volumes = self.rpc.call_node_volumes(nodenames)
2259

    
2260
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2261
             in self.cfg.GetInstanceList()]
2262

    
2263
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2264

    
2265
    output = []
2266
    for node in nodenames:
2267
      nresult = volumes[node]
2268
      if nresult.offline:
2269
        continue
2270
      msg = nresult.fail_msg
2271
      if msg:
2272
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2273
        continue
2274

    
2275
      node_vols = nresult.payload[:]
2276
      node_vols.sort(key=lambda vol: vol['dev'])
2277

    
2278
      for vol in node_vols:
2279
        node_output = []
2280
        for field in self.op.output_fields:
2281
          if field == "node":
2282
            val = node
2283
          elif field == "phys":
2284
            val = vol['dev']
2285
          elif field == "vg":
2286
            val = vol['vg']
2287
          elif field == "name":
2288
            val = vol['name']
2289
          elif field == "size":
2290
            val = int(float(vol['size']))
2291
          elif field == "instance":
2292
            for inst in ilist:
2293
              if node not in lv_by_node[inst]:
2294
                continue
2295
              if vol['name'] in lv_by_node[inst][node]:
2296
                val = inst.name
2297
                break
2298
            else:
2299
              val = '-'
2300
          else:
2301
            raise errors.ParameterError(field)
2302
          node_output.append(str(val))
2303

    
2304
        output.append(node_output)
2305

    
2306
    return output
2307

    
2308

    
2309
class LUAddNode(LogicalUnit):
2310
  """Logical unit for adding node to the cluster.
2311

2312
  """
2313
  HPATH = "node-add"
2314
  HTYPE = constants.HTYPE_NODE
2315
  _OP_REQP = ["node_name"]
2316

    
2317
  def BuildHooksEnv(self):
2318
    """Build hooks env.
2319

2320
    This will run on all nodes before, and on all nodes + the new node after.
2321

2322
    """
2323
    env = {
2324
      "OP_TARGET": self.op.node_name,
2325
      "NODE_NAME": self.op.node_name,
2326
      "NODE_PIP": self.op.primary_ip,
2327
      "NODE_SIP": self.op.secondary_ip,
2328
      }
2329
    nodes_0 = self.cfg.GetNodeList()
2330
    nodes_1 = nodes_0 + [self.op.node_name, ]
2331
    return env, nodes_0, nodes_1
2332

    
2333
  def CheckPrereq(self):
2334
    """Check prerequisites.
2335

2336
    This checks:
2337
     - the new node is not already in the config
2338
     - it is resolvable
2339
     - its parameters (single/dual homed) matches the cluster
2340

2341
    Any errors are signaled by raising errors.OpPrereqError.
2342

2343
    """
2344
    node_name = self.op.node_name
2345
    cfg = self.cfg
2346

    
2347
    dns_data = utils.HostInfo(node_name)
2348

    
2349
    node = dns_data.name
2350
    primary_ip = self.op.primary_ip = dns_data.ip
2351
    secondary_ip = getattr(self.op, "secondary_ip", None)
2352
    if secondary_ip is None:
2353
      secondary_ip = primary_ip
2354
    if not utils.IsValidIP(secondary_ip):
2355
      raise errors.OpPrereqError("Invalid secondary IP given")
2356
    self.op.secondary_ip = secondary_ip
2357

    
2358
    node_list = cfg.GetNodeList()
2359
    if not self.op.readd and node in node_list:
2360
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2361
                                 node)
2362
    elif self.op.readd and node not in node_list:
2363
      raise errors.OpPrereqError("Node %s is not in the configuration" % node)
2364

    
2365
    for existing_node_name in node_list:
2366
      existing_node = cfg.GetNodeInfo(existing_node_name)
2367

    
2368
      if self.op.readd and node == existing_node_name:
2369
        if (existing_node.primary_ip != primary_ip or
2370
            existing_node.secondary_ip != secondary_ip):
2371
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2372
                                     " address configuration as before")
2373
        continue
2374

    
2375
      if (existing_node.primary_ip == primary_ip or
2376
          existing_node.secondary_ip == primary_ip or
2377
          existing_node.primary_ip == secondary_ip or
2378
          existing_node.secondary_ip == secondary_ip):
2379
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2380
                                   " existing node %s" % existing_node.name)
2381

    
2382
    # check that the type of the node (single versus dual homed) is the
2383
    # same as for the master
2384
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2385
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2386
    newbie_singlehomed = secondary_ip == primary_ip
2387
    if master_singlehomed != newbie_singlehomed:
2388
      if master_singlehomed:
2389
        raise errors.OpPrereqError("The master has no private ip but the"
2390
                                   " new node has one")
2391
      else:
2392
        raise errors.OpPrereqError("The master has a private ip but the"
2393
                                   " new node doesn't have one")
2394

    
2395
    # checks reachability
2396
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2397
      raise errors.OpPrereqError("Node not reachable by ping")
2398

    
2399
    if not newbie_singlehomed:
2400
      # check reachability from my secondary ip to newbie's secondary ip
2401
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2402
                           source=myself.secondary_ip):
2403
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2404
                                   " based ping to noded port")
2405

    
2406
    cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2407
    if self.op.readd:
2408
      exceptions = [node]
2409
    else:
2410
      exceptions = []
2411
    mc_now, mc_max = self.cfg.GetMasterCandidateStats(exceptions)
2412
    # the new node will increase mc_max with one, so:
2413
    mc_max = min(mc_max + 1, cp_size)
2414
    self.master_candidate = mc_now < mc_max
2415

    
2416
    if self.op.readd:
2417
      self.new_node = self.cfg.GetNodeInfo(node)
2418
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
2419
    else:
2420
      self.new_node = objects.Node(name=node,
2421
                                   primary_ip=primary_ip,
2422
                                   secondary_ip=secondary_ip,
2423
                                   master_candidate=self.master_candidate,
2424
                                   offline=False, drained=False)
2425

    
2426
  def Exec(self, feedback_fn):
2427
    """Adds the new node to the cluster.
2428

2429
    """
2430
    new_node = self.new_node
2431
    node = new_node.name
2432

    
2433
    # for re-adds, reset the offline/drained/master-candidate flags;
2434
    # we need to reset here, otherwise offline would prevent RPC calls
2435
    # later in the procedure; this also means that if the re-add
2436
    # fails, we are left with a non-offlined, broken node
2437
    if self.op.readd:
2438
      new_node.drained = new_node.offline = False
2439
      self.LogInfo("Readding a node, the offline/drained flags were reset")
2440
      # if we demote the node, we do cleanup later in the procedure
2441
      new_node.master_candidate = self.master_candidate
2442

    
2443
    # notify the user about any possible mc promotion
2444
    if new_node.master_candidate:
2445
      self.LogInfo("Node will be a master candidate")
2446

    
2447
    # check connectivity
2448
    result = self.rpc.call_version([node])[node]
2449
    result.Raise("Can't get version information from node %s" % node)
2450
    if constants.PROTOCOL_VERSION == result.payload:
2451
      logging.info("Communication to node %s fine, sw version %s match",
2452
                   node, result.payload)
2453
    else:
2454
      raise errors.OpExecError("Version mismatch master version %s,"
2455
                               " node version %s" %
2456
                               (constants.PROTOCOL_VERSION, result.payload))
2457

    
2458
    # setup ssh on node
2459
    logging.info("Copy ssh key to node %s", node)
2460
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
2461
    keyarray = []
2462
    keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
2463
                constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
2464
                priv_key, pub_key]
2465

    
2466
    for i in keyfiles:
2467
      f = open(i, 'r')
2468
      try:
2469
        keyarray.append(f.read())
2470
      finally:
2471
        f.close()
2472

    
2473
    result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
2474
                                    keyarray[2],
2475
                                    keyarray[3], keyarray[4], keyarray[5])
2476
    result.Raise("Cannot transfer ssh keys to the new node")
2477

    
2478
    # Add node to our /etc/hosts, and add key to known_hosts
2479
    if self.cfg.GetClusterInfo().modify_etc_hosts:
2480
      utils.AddHostToEtcHosts(new_node.name)
2481

    
2482
    if new_node.secondary_ip != new_node.primary_ip:
2483
      result = self.rpc.call_node_has_ip_address(new_node.name,
2484
                                                 new_node.secondary_ip)
2485
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
2486
                   prereq=True)
2487
      if not result.payload:
2488
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
2489
                                 " you gave (%s). Please fix and re-run this"
2490
                                 " command." % new_node.secondary_ip)
2491

    
2492
    node_verify_list = [self.cfg.GetMasterNode()]
2493
    node_verify_param = {
2494
      'nodelist': [node],
2495
      # TODO: do a node-net-test as well?
2496
    }
2497

    
2498
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2499
                                       self.cfg.GetClusterName())
2500
    for verifier in node_verify_list:
2501
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
2502
      nl_payload = result[verifier].payload['nodelist']
2503
      if nl_payload:
2504
        for failed in nl_payload:
2505
          feedback_fn("ssh/hostname verification failed %s -> %s" %
2506
                      (verifier, nl_payload[failed]))
2507
        raise errors.OpExecError("ssh/hostname verification failed.")
2508

    
2509
    if self.op.readd:
2510
      _RedistributeAncillaryFiles(self)
2511
      self.context.ReaddNode(new_node)
2512
      # make sure we redistribute the config
2513
      self.cfg.Update(new_node)
2514
      # and make sure the new node will not have old files around
2515
      if not new_node.master_candidate:
2516
        result = self.rpc.call_node_demote_from_mc(new_node.name)
2517
        msg = result.RemoteFailMsg()
2518
        if msg:
2519
          self.LogWarning("Node failed to demote itself from master"
2520
                          " candidate status: %s" % msg)
2521
    else:
2522
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
2523
      self.context.AddNode(new_node)
2524

    
2525

    
2526
class LUSetNodeParams(LogicalUnit):
2527
  """Modifies the parameters of a node.
2528

2529
  """
2530
  HPATH = "node-modify"
2531
  HTYPE = constants.HTYPE_NODE
2532
  _OP_REQP = ["node_name"]
2533
  REQ_BGL = False
2534

    
2535
  def CheckArguments(self):
2536
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2537
    if node_name is None:
2538
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2539
    self.op.node_name = node_name
2540
    _CheckBooleanOpField(self.op, 'master_candidate')
2541
    _CheckBooleanOpField(self.op, 'offline')
2542
    _CheckBooleanOpField(self.op, 'drained')
2543
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
2544
    if all_mods.count(None) == 3:
2545
      raise errors.OpPrereqError("Please pass at least one modification")
2546
    if all_mods.count(True) > 1:
2547
      raise errors.OpPrereqError("Can't set the node into more than one"
2548
                                 " state at the same time")
2549

    
2550
  def ExpandNames(self):
2551
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
2552

    
2553
  def BuildHooksEnv(self):
2554
    """Build hooks env.
2555

2556
    This runs on the master node.
2557

2558
    """
2559
    env = {
2560
      "OP_TARGET": self.op.node_name,
2561
      "MASTER_CANDIDATE": str(self.op.master_candidate),
2562
      "OFFLINE": str(self.op.offline),
2563
      "DRAINED": str(self.op.drained),
2564
      }
2565
    nl = [self.cfg.GetMasterNode(),
2566
          self.op.node_name]
2567
    return env, nl, nl
2568

    
2569
  def CheckPrereq(self):
2570
    """Check prerequisites.
2571

2572
    This only checks the instance list against the existing names.
2573

2574
    """
2575
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
2576

    
2577
    if ((self.op.master_candidate == False or self.op.offline == True or
2578
         self.op.drained == True) and node.master_candidate):
2579
      # we will demote the node from master_candidate
2580
      if self.op.node_name == self.cfg.GetMasterNode():
2581
        raise errors.OpPrereqError("The master node has to be a"
2582
                                   " master candidate, online and not drained")
2583
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2584
      num_candidates, _ = self.cfg.GetMasterCandidateStats()
2585
      if num_candidates <= cp_size:
2586
        msg = ("Not enough master candidates (desired"
2587
               " %d, new value will be %d)" % (cp_size, num_candidates-1))
2588
        if self.op.force:
2589
          self.LogWarning(msg)
2590
        else:
2591
          raise errors.OpPrereqError(msg)
2592

    
2593
    if (self.op.master_candidate == True and
2594
        ((node.offline and not self.op.offline == False) or
2595
         (node.drained and not self.op.drained == False))):
2596
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
2597
                                 " to master_candidate" % node.name)
2598

    
2599
    return
2600

    
2601
  def Exec(self, feedback_fn):
2602
    """Modifies a node.
2603

2604
    """
2605
    node = self.node
2606

    
2607
    result = []
2608
    changed_mc = False
2609

    
2610
    if self.op.offline is not None:
2611
      node.offline = self.op.offline
2612
      result.append(("offline", str(self.op.offline)))
2613
      if self.op.offline == True:
2614
        if node.master_candidate:
2615
          node.master_candidate = False
2616
          changed_mc = True
2617
          result.append(("master_candidate", "auto-demotion due to offline"))
2618
        if node.drained:
2619
          node.drained = False
2620
          result.append(("drained", "clear drained status due to offline"))
2621

    
2622
    if self.op.master_candidate is not None:
2623
      node.master_candidate = self.op.master_candidate
2624
      changed_mc = True
2625
      result.append(("master_candidate", str(self.op.master_candidate)))
2626
      if self.op.master_candidate == False:
2627
        rrc = self.rpc.call_node_demote_from_mc(node.name)
2628
        msg = rrc.fail_msg
2629
        if msg:
2630
          self.LogWarning("Node failed to demote itself: %s" % msg)
2631

    
2632
    if self.op.drained is not None:
2633
      node.drained = self.op.drained
2634
      result.append(("drained", str(self.op.drained)))
2635
      if self.op.drained == True:
2636
        if node.master_candidate:
2637
          node.master_candidate = False
2638
          changed_mc = True
2639
          result.append(("master_candidate", "auto-demotion due to drain"))
2640
          rrc = self.rpc.call_node_demote_from_mc(node.name)
2641
          msg = rrc.RemoteFailMsg()
2642
          if msg:
2643
            self.LogWarning("Node failed to demote itself: %s" % msg)
2644
        if node.offline:
2645
          node.offline = False
2646
          result.append(("offline", "clear offline status due to drain"))
2647

    
2648
    # this will trigger configuration file update, if needed
2649
    self.cfg.Update(node)
2650
    # this will trigger job queue propagation or cleanup
2651
    if changed_mc:
2652
      self.context.ReaddNode(node)
2653

    
2654
    return result
2655

    
2656

    
2657
class LUPowercycleNode(NoHooksLU):
2658
  """Powercycles a node.
2659

2660
  """
2661
  _OP_REQP = ["node_name", "force"]
2662
  REQ_BGL = False
2663

    
2664
  def CheckArguments(self):
2665
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2666
    if node_name is None:
2667
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2668
    self.op.node_name = node_name
2669
    if node_name == self.cfg.GetMasterNode() and not self.op.force:
2670
      raise errors.OpPrereqError("The node is the master and the force"
2671
                                 " parameter was not set")
2672

    
2673
  def ExpandNames(self):
2674
    """Locking for PowercycleNode.
2675

2676
    This is a last-resource option and shouldn't block on other
2677
    jobs. Therefore, we grab no locks.
2678

2679
    """
2680
    self.needed_locks = {}
2681

    
2682
  def CheckPrereq(self):
2683
    """Check prerequisites.
2684

2685
    This LU has no prereqs.
2686

2687
    """
2688
    pass
2689

    
2690
  def Exec(self, feedback_fn):
2691
    """Reboots a node.
2692

2693
    """
2694
    result = self.rpc.call_node_powercycle(self.op.node_name,
2695
                                           self.cfg.GetHypervisorType())
2696
    result.Raise("Failed to schedule the reboot")
2697
    return result.payload
2698

    
2699

    
2700
class LUQueryClusterInfo(NoHooksLU):
2701
  """Query cluster configuration.
2702

2703
  """
2704
  _OP_REQP = []
2705
  REQ_BGL = False
2706

    
2707
  def ExpandNames(self):
2708
    self.needed_locks = {}
2709

    
2710
  def CheckPrereq(self):
2711
    """No prerequsites needed for this LU.
2712

2713
    """
2714
    pass
2715

    
2716
  def Exec(self, feedback_fn):
2717
    """Return cluster config.
2718

2719
    """
2720
    cluster = self.cfg.GetClusterInfo()
2721
    result = {
2722
      "software_version": constants.RELEASE_VERSION,
2723
      "protocol_version": constants.PROTOCOL_VERSION,
2724
      "config_version": constants.CONFIG_VERSION,
2725
      "os_api_version": max(constants.OS_API_VERSIONS),
2726
      "export_version": constants.EXPORT_VERSION,
2727
      "architecture": (platform.architecture()[0], platform.machine()),
2728
      "name": cluster.cluster_name,
2729
      "master": cluster.master_node,
2730
      "default_hypervisor": cluster.enabled_hypervisors[0],
2731
      "enabled_hypervisors": cluster.enabled_hypervisors,
2732
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
2733
                        for hypervisor_name in cluster.enabled_hypervisors]),
2734
      "beparams": cluster.beparams,
2735
      "nicparams": cluster.nicparams,
2736
      "candidate_pool_size": cluster.candidate_pool_size,
2737
      "master_netdev": cluster.master_netdev,
2738
      "volume_group_name": cluster.volume_group_name,
2739
      "file_storage_dir": cluster.file_storage_dir,
2740
      }
2741

    
2742
    return result
2743

    
2744

    
2745
class LUQueryConfigValues(NoHooksLU):
2746
  """Return configuration values.
2747

2748
  """
2749
  _OP_REQP = []
2750
  REQ_BGL = False
2751
  _FIELDS_DYNAMIC = utils.FieldSet()
2752
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag")
2753

    
2754
  def ExpandNames(self):
2755
    self.needed_locks = {}
2756

    
2757
    _CheckOutputFields(static=self._FIELDS_STATIC,
2758
                       dynamic=self._FIELDS_DYNAMIC,
2759
                       selected=self.op.output_fields)
2760

    
2761
  def CheckPrereq(self):
2762
    """No prerequisites.
2763

2764
    """
2765
    pass
2766

    
2767
  def Exec(self, feedback_fn):
2768
    """Dump a representation of the cluster config to the standard output.
2769

2770
    """
2771
    values = []
2772
    for field in self.op.output_fields:
2773
      if field == "cluster_name":
2774
        entry = self.cfg.GetClusterName()
2775
      elif field == "master_node":
2776
        entry = self.cfg.GetMasterNode()
2777
      elif field == "drain_flag":
2778
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
2779
      else:
2780
        raise errors.ParameterError(field)
2781
      values.append(entry)
2782
    return values
2783

    
2784

    
2785
class LUActivateInstanceDisks(NoHooksLU):
2786
  """Bring up an instance's disks.
2787

2788
  """
2789
  _OP_REQP = ["instance_name"]
2790
  REQ_BGL = False
2791

    
2792
  def ExpandNames(self):
2793
    self._ExpandAndLockInstance()
2794
    self.needed_locks[locking.LEVEL_NODE] = []
2795
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2796

    
2797
  def DeclareLocks(self, level):
2798
    if level == locking.LEVEL_NODE:
2799
      self._LockInstancesNodes()
2800

    
2801
  def CheckPrereq(self):
2802
    """Check prerequisites.
2803

2804
    This checks that the instance is in the cluster.
2805

2806
    """
2807
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2808
    assert self.instance is not None, \
2809
      "Cannot retrieve locked instance %s" % self.op.instance_name
2810
    _CheckNodeOnline(self, self.instance.primary_node)
2811

    
2812
  def Exec(self, feedback_fn):
2813
    """Activate the disks.
2814

2815
    """
2816
    disks_ok, disks_info = _AssembleInstanceDisks(self, self.instance)
2817
    if not disks_ok:
2818
      raise errors.OpExecError("Cannot activate block devices")
2819

    
2820
    return disks_info
2821

    
2822

    
2823
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
2824
  """Prepare the block devices for an instance.
2825

2826
  This sets up the block devices on all nodes.
2827

2828
  @type lu: L{LogicalUnit}
2829
  @param lu: the logical unit on whose behalf we execute
2830
  @type instance: L{objects.Instance}
2831
  @param instance: the instance for whose disks we assemble
2832
  @type ignore_secondaries: boolean
2833
  @param ignore_secondaries: if true, errors on secondary nodes
2834
      won't result in an error return from the function
2835
  @return: False if the operation failed, otherwise a list of
2836
      (host, instance_visible_name, node_visible_name)
2837
      with the mapping from node devices to instance devices
2838

2839
  """
2840
  device_info = []
2841
  disks_ok = True
2842
  iname = instance.name
2843
  # With the two passes mechanism we try to reduce the window of
2844
  # opportunity for the race condition of switching DRBD to primary
2845
  # before handshaking occured, but we do not eliminate it
2846

    
2847
  # The proper fix would be to wait (with some limits) until the
2848
  # connection has been made and drbd transitions from WFConnection
2849
  # into any other network-connected state (Connected, SyncTarget,
2850
  # SyncSource, etc.)
2851

    
2852
  # 1st pass, assemble on all nodes in secondary mode
2853
  for inst_disk in instance.disks:
2854
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2855
      lu.cfg.SetDiskID(node_disk, node)
2856
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
2857
      msg = result.fail_msg
2858
      if msg:
2859
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
2860
                           " (is_primary=False, pass=1): %s",
2861
                           inst_disk.iv_name, node, msg)
2862
        if not ignore_secondaries:
2863
          disks_ok = False
2864

    
2865
  # FIXME: race condition on drbd migration to primary
2866

    
2867
  # 2nd pass, do only the primary node
2868
  for inst_disk in instance.disks:
2869
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2870
      if node != instance.primary_node:
2871
        continue
2872
      lu.cfg.SetDiskID(node_disk, node)
2873
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
2874
      msg = result.fail_msg
2875
      if msg:
2876
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
2877
                           " (is_primary=True, pass=2): %s",
2878
                           inst_disk.iv_name, node, msg)
2879
        disks_ok = False
2880
    device_info.append((instance.primary_node, inst_disk.iv_name,
2881
                        result.payload))
2882

    
2883
  # leave the disks configured for the primary node
2884
  # this is a workaround that would be fixed better by
2885
  # improving the logical/physical id handling
2886
  for disk in instance.disks:
2887
    lu.cfg.SetDiskID(disk, instance.primary_node)
2888

    
2889
  return disks_ok, device_info
2890

    
2891

    
2892
def _StartInstanceDisks(lu, instance, force):
2893
  """Start the disks of an instance.
2894

2895
  """
2896
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
2897
                                           ignore_secondaries=force)
2898
  if not disks_ok:
2899
    _ShutdownInstanceDisks(lu, instance)
2900
    if force is not None and not force:
2901
      lu.proc.LogWarning("", hint="If the message above refers to a"
2902
                         " secondary node,"
2903
                         " you can retry the operation using '--force'.")
2904
    raise errors.OpExecError("Disk consistency error")
2905

    
2906

    
2907
class LUDeactivateInstanceDisks(NoHooksLU):
2908
  """Shutdown an instance's disks.
2909

2910
  """
2911
  _OP_REQP = ["instance_name"]
2912
  REQ_BGL = False
2913

    
2914
  def ExpandNames(self):
2915
    self._ExpandAndLockInstance()
2916
    self.needed_locks[locking.LEVEL_NODE] = []
2917
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2918

    
2919
  def DeclareLocks(self, level):
2920
    if level == locking.LEVEL_NODE:
2921
      self._LockInstancesNodes()
2922

    
2923
  def CheckPrereq(self):
2924
    """Check prerequisites.
2925

2926
    This checks that the instance is in the cluster.
2927

2928
    """
2929
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2930
    assert self.instance is not None, \
2931
      "Cannot retrieve locked instance %s" % self.op.instance_name
2932

    
2933
  def Exec(self, feedback_fn):
2934
    """Deactivate the disks
2935

2936
    """
2937
    instance = self.instance
2938
    _SafeShutdownInstanceDisks(self, instance)
2939

    
2940

    
2941
def _SafeShutdownInstanceDisks(lu, instance):
2942
  """Shutdown block devices of an instance.
2943

2944
  This function checks if an instance is running, before calling
2945
  _ShutdownInstanceDisks.
2946

2947
  """
2948
  pnode = instance.primary_node
2949
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
2950
  ins_l.Raise("Can't contact node %s" % pnode)
2951

    
2952
  if instance.name in ins_l.payload:
2953
    raise errors.OpExecError("Instance is running, can't shutdown"
2954
                             " block devices.")
2955

    
2956
  _ShutdownInstanceDisks(lu, instance)
2957

    
2958

    
2959
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
2960
  """Shutdown block devices of an instance.
2961

2962
  This does the shutdown on all nodes of the instance.
2963

2964
  If the ignore_primary is false, errors on the primary node are
2965
  ignored.
2966

2967
  """
2968
  all_result = True
2969
  for disk in instance.disks:
2970
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
2971
      lu.cfg.SetDiskID(top_disk, node)
2972
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
2973
      msg = result.fail_msg
2974
      if msg:
2975
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
2976
                      disk.iv_name, node, msg)
2977
        if not ignore_primary or node != instance.primary_node:
2978
          all_result = False
2979
  return all_result
2980

    
2981

    
2982
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
2983
  """Checks if a node has enough free memory.
2984

2985
  This function check if a given node has the needed amount of free
2986
  memory. In case the node has less memory or we cannot get the
2987
  information from the node, this function raise an OpPrereqError
2988
  exception.
2989

2990
  @type lu: C{LogicalUnit}
2991
  @param lu: a logical unit from which we get configuration data
2992
  @type node: C{str}
2993
  @param node: the node to check
2994
  @type reason: C{str}
2995
  @param reason: string to use in the error message
2996
  @type requested: C{int}
2997
  @param requested: the amount of memory in MiB to check for
2998
  @type hypervisor_name: C{str}
2999
  @param hypervisor_name: the hypervisor to ask for memory stats
3000
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3001
      we cannot check the node
3002

3003
  """
3004
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3005
  nodeinfo[node].Raise("Can't get data from node %s" % node, prereq=True)
3006
  free_mem = nodeinfo[node].payload.get('memory_free', None)
3007
  if not isinstance(free_mem, int):
3008
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3009
                               " was '%s'" % (node, free_mem))
3010
  if requested > free_mem:
3011
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3012
                               " needed %s MiB, available %s MiB" %
3013
                               (node, reason, requested, free_mem))
3014

    
3015

    
3016
class LUStartupInstance(LogicalUnit):
3017
  """Starts an instance.
3018

3019
  """
3020
  HPATH = "instance-start"
3021
  HTYPE = constants.HTYPE_INSTANCE
3022
  _OP_REQP = ["instance_name", "force"]
3023
  REQ_BGL = False
3024

    
3025
  def ExpandNames(self):
3026
    self._ExpandAndLockInstance()
3027

    
3028
  def BuildHooksEnv(self):
3029
    """Build hooks env.
3030

3031
    This runs on master, primary and secondary nodes of the instance.
3032

3033
    """
3034
    env = {
3035
      "FORCE": self.op.force,
3036
      }
3037
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3038
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3039
    return env, nl, nl
3040

    
3041
  def CheckPrereq(self):
3042
    """Check prerequisites.
3043

3044
    This checks that the instance is in the cluster.
3045

3046
    """
3047
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3048
    assert self.instance is not None, \
3049
      "Cannot retrieve locked instance %s" % self.op.instance_name
3050

    
3051
    # extra beparams
3052
    self.beparams = getattr(self.op, "beparams", {})
3053
    if self.beparams:
3054
      if not isinstance(self.beparams, dict):
3055
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3056
                                   " dict" % (type(self.beparams), ))
3057
      # fill the beparams dict
3058
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3059
      self.op.beparams = self.beparams
3060

    
3061
    # extra hvparams
3062
    self.hvparams = getattr(self.op, "hvparams", {})
3063
    if self.hvparams:
3064
      if not isinstance(self.hvparams, dict):
3065
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3066
                                   " dict" % (type(self.hvparams), ))
3067

    
3068
      # check hypervisor parameter syntax (locally)
3069
      cluster = self.cfg.GetClusterInfo()
3070
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3071
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3072
                                    instance.hvparams)
3073
      filled_hvp.update(self.hvparams)
3074
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3075
      hv_type.CheckParameterSyntax(filled_hvp)
3076
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3077
      self.op.hvparams = self.hvparams
3078

    
3079
    _CheckNodeOnline(self, instance.primary_node)
3080

    
3081
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3082
    # check bridges existence
3083
    _CheckInstanceBridgesExist(self, instance)
3084

    
3085
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3086
                                              instance.name,
3087
                                              instance.hypervisor)
3088
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3089
                      prereq=True)
3090
    if not remote_info.payload: # not running already
3091
      _CheckNodeFreeMemory(self, instance.primary_node,
3092
                           "starting instance %s" % instance.name,
3093
                           bep[constants.BE_MEMORY], instance.hypervisor)
3094

    
3095
  def Exec(self, feedback_fn):
3096
    """Start the instance.
3097

3098
    """
3099
    instance = self.instance
3100
    force = self.op.force
3101

    
3102
    self.cfg.MarkInstanceUp(instance.name)
3103

    
3104
    node_current = instance.primary_node
3105

    
3106
    _StartInstanceDisks(self, instance, force)
3107

    
3108
    result = self.rpc.call_instance_start(node_current, instance,
3109
                                          self.hvparams, self.beparams)
3110
    msg = result.fail_msg
3111
    if msg:
3112
      _ShutdownInstanceDisks(self, instance)
3113
      raise errors.OpExecError("Could not start instance: %s" % msg)
3114

    
3115

    
3116
class LURebootInstance(LogicalUnit):
3117
  """Reboot an instance.
3118

3119
  """
3120
  HPATH = "instance-reboot"
3121
  HTYPE = constants.HTYPE_INSTANCE
3122
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3123
  REQ_BGL = False
3124

    
3125
  def ExpandNames(self):
3126
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3127
                                   constants.INSTANCE_REBOOT_HARD,
3128
                                   constants.INSTANCE_REBOOT_FULL]:
3129
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3130
                                  (constants.INSTANCE_REBOOT_SOFT,
3131
                                   constants.INSTANCE_REBOOT_HARD,
3132
                                   constants.INSTANCE_REBOOT_FULL))
3133
    self._ExpandAndLockInstance()
3134

    
3135
  def BuildHooksEnv(self):
3136
    """Build hooks env.
3137

3138
    This runs on master, primary and secondary nodes of the instance.
3139

3140
    """
3141
    env = {
3142
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3143
      "REBOOT_TYPE": self.op.reboot_type,
3144
      }
3145
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3146
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3147
    return env, nl, nl
3148

    
3149
  def CheckPrereq(self):
3150
    """Check prerequisites.
3151

3152
    This checks that the instance is in the cluster.
3153

3154
    """
3155
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3156
    assert self.instance is not None, \
3157
      "Cannot retrieve locked instance %s" % self.op.instance_name
3158

    
3159
    _CheckNodeOnline(self, instance.primary_node)
3160

    
3161
    # check bridges existence
3162
    _CheckInstanceBridgesExist(self, instance)
3163

    
3164
  def Exec(self, feedback_fn):
3165
    """Reboot the instance.
3166

3167
    """
3168
    instance = self.instance
3169
    ignore_secondaries = self.op.ignore_secondaries
3170
    reboot_type = self.op.reboot_type
3171

    
3172
    node_current = instance.primary_node
3173

    
3174
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3175
                       constants.INSTANCE_REBOOT_HARD]:
3176
      for disk in instance.disks:
3177
        self.cfg.SetDiskID(disk, node_current)
3178
      result = self.rpc.call_instance_reboot(node_current, instance,
3179
                                             reboot_type)
3180
      result.Raise("Could not reboot instance")
3181
    else:
3182
      result = self.rpc.call_instance_shutdown(node_current, instance)
3183
      result.Raise("Could not shutdown instance for full reboot")
3184
      _ShutdownInstanceDisks(self, instance)
3185
      _StartInstanceDisks(self, instance, ignore_secondaries)
3186
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3187
      msg = result.fail_msg
3188
      if msg:
3189
        _ShutdownInstanceDisks(self, instance)
3190
        raise errors.OpExecError("Could not start instance for"
3191
                                 " full reboot: %s" % msg)
3192

    
3193
    self.cfg.MarkInstanceUp(instance.name)
3194

    
3195

    
3196
class LUShutdownInstance(LogicalUnit):
3197
  """Shutdown an instance.
3198

3199
  """
3200
  HPATH = "instance-stop"
3201
  HTYPE = constants.HTYPE_INSTANCE
3202
  _OP_REQP = ["instance_name"]
3203
  REQ_BGL = False
3204

    
3205
  def ExpandNames(self):
3206
    self._ExpandAndLockInstance()
3207

    
3208
  def BuildHooksEnv(self):
3209
    """Build hooks env.
3210

3211
    This runs on master, primary and secondary nodes of the instance.
3212

3213
    """
3214
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3215
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3216
    return env, nl, nl
3217

    
3218
  def CheckPrereq(self):
3219
    """Check prerequisites.
3220

3221
    This checks that the instance is in the cluster.
3222

3223
    """
3224
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3225
    assert self.instance is not None, \
3226
      "Cannot retrieve locked instance %s" % self.op.instance_name
3227
    _CheckNodeOnline(self, self.instance.primary_node)
3228

    
3229
  def Exec(self, feedback_fn):
3230
    """Shutdown the instance.
3231

3232
    """
3233
    instance = self.instance
3234
    node_current = instance.primary_node
3235
    self.cfg.MarkInstanceDown(instance.name)
3236
    result = self.rpc.call_instance_shutdown(node_current, instance)
3237
    msg = result.fail_msg
3238
    if msg:
3239
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3240

    
3241
    _ShutdownInstanceDisks(self, instance)
3242

    
3243

    
3244
class LUReinstallInstance(LogicalUnit):
3245
  """Reinstall an instance.
3246

3247
  """
3248
  HPATH = "instance-reinstall"
3249
  HTYPE = constants.HTYPE_INSTANCE
3250
  _OP_REQP = ["instance_name"]
3251
  REQ_BGL = False
3252

    
3253
  def ExpandNames(self):
3254
    self._ExpandAndLockInstance()
3255

    
3256
  def BuildHooksEnv(self):
3257
    """Build hooks env.
3258

3259
    This runs on master, primary and secondary nodes of the instance.
3260

3261
    """
3262
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3263
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3264
    return env, nl, nl
3265

    
3266
  def CheckPrereq(self):
3267
    """Check prerequisites.
3268

3269
    This checks that the instance is in the cluster and is not running.
3270

3271
    """
3272
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3273
    assert instance is not None, \
3274
      "Cannot retrieve locked instance %s" % self.op.instance_name
3275
    _CheckNodeOnline(self, instance.primary_node)
3276

    
3277
    if instance.disk_template == constants.DT_DISKLESS:
3278
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3279
                                 self.op.instance_name)
3280
    if instance.admin_up:
3281
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3282
                                 self.op.instance_name)
3283
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3284
                                              instance.name,
3285
                                              instance.hypervisor)
3286
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3287
                      prereq=True)
3288
    if remote_info.payload:
3289
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3290
                                 (self.op.instance_name,
3291
                                  instance.primary_node))
3292

    
3293
    self.op.os_type = getattr(self.op, "os_type", None)
3294
    if self.op.os_type is not None:
3295
      # OS verification
3296
      pnode = self.cfg.GetNodeInfo(
3297
        self.cfg.ExpandNodeName(instance.primary_node))
3298
      if pnode is None:
3299
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
3300
                                   self.op.pnode)
3301
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3302
      result.Raise("OS '%s' not in supported OS list for primary node %s" %
3303
                   (self.op.os_type, pnode.name), prereq=True)
3304

    
3305
    self.instance = instance
3306

    
3307
  def Exec(self, feedback_fn):
3308
    """Reinstall the instance.
3309

3310
    """
3311
    inst = self.instance
3312

    
3313
    if self.op.os_type is not None:
3314
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3315
      inst.os = self.op.os_type
3316
      self.cfg.Update(inst)
3317

    
3318
    _StartInstanceDisks(self, inst, None)
3319
    try:
3320
      feedback_fn("Running the instance OS create scripts...")
3321
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
3322
      result.Raise("Could not install OS for instance %s on node %s" %
3323
                   (inst.name, inst.primary_node))
3324
    finally:
3325
      _ShutdownInstanceDisks(self, inst)
3326

    
3327

    
3328
class LURenameInstance(LogicalUnit):
3329
  """Rename an instance.
3330

3331
  """
3332
  HPATH = "instance-rename"
3333
  HTYPE = constants.HTYPE_INSTANCE
3334
  _OP_REQP = ["instance_name", "new_name"]
3335

    
3336
  def BuildHooksEnv(self):
3337
    """Build hooks env.
3338

3339
    This runs on master, primary and secondary nodes of the instance.
3340

3341
    """
3342
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3343
    env["INSTANCE_NEW_NAME"] = self.op.new_name
3344
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3345
    return env, nl, nl
3346

    
3347
  def CheckPrereq(self):
3348
    """Check prerequisites.
3349

3350
    This checks that the instance is in the cluster and is not running.
3351

3352
    """
3353
    instance = self.cfg.GetInstanceInfo(
3354
      self.cfg.ExpandInstanceName(self.op.instance_name))
3355
    if instance is None:
3356
      raise errors.OpPrereqError("Instance '%s' not known" %
3357
                                 self.op.instance_name)
3358
    _CheckNodeOnline(self, instance.primary_node)
3359

    
3360
    if instance.admin_up:
3361
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3362
                                 self.op.instance_name)
3363
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3364
                                              instance.name,
3365
                                              instance.hypervisor)
3366
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3367
                      prereq=True)
3368
    if remote_info.payload:
3369
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3370
                                 (self.op.instance_name,
3371
                                  instance.primary_node))
3372
    self.instance = instance
3373

    
3374
    # new name verification
3375
    name_info = utils.HostInfo(self.op.new_name)
3376

    
3377
    self.op.new_name = new_name = name_info.name
3378
    instance_list = self.cfg.GetInstanceList()
3379
    if new_name in instance_list:
3380
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3381
                                 new_name)
3382

    
3383
    if not getattr(self.op, "ignore_ip", False):
3384
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
3385
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
3386
                                   (name_info.ip, new_name))
3387

    
3388

    
3389
  def Exec(self, feedback_fn):
3390
    """Reinstall the instance.
3391

3392
    """
3393
    inst = self.instance
3394
    old_name = inst.name
3395

    
3396
    if inst.disk_template == constants.DT_FILE:
3397
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3398

    
3399
    self.cfg.RenameInstance(inst.name, self.op.new_name)
3400
    # Change the instance lock. This is definitely safe while we hold the BGL
3401
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
3402
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
3403

    
3404
    # re-read the instance from the configuration after rename
3405
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
3406

    
3407
    if inst.disk_template == constants.DT_FILE:
3408
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3409
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
3410
                                                     old_file_storage_dir,
3411
                                                     new_file_storage_dir)
3412
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
3413
                   " (but the instance has been renamed in Ganeti)" %
3414
                   (inst.primary_node, old_file_storage_dir,
3415
                    new_file_storage_dir))
3416

    
3417
    _StartInstanceDisks(self, inst, None)
3418
    try:
3419
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
3420
                                                 old_name)
3421
      msg = result.fail_msg
3422
      if msg:
3423
        msg = ("Could not run OS rename script for instance %s on node %s"
3424
               " (but the instance has been renamed in Ganeti): %s" %
3425
               (inst.name, inst.primary_node, msg))
3426
        self.proc.LogWarning(msg)
3427
    finally:
3428
      _ShutdownInstanceDisks(self, inst)
3429

    
3430

    
3431
class LURemoveInstance(LogicalUnit):
3432
  """Remove an instance.
3433

3434
  """
3435
  HPATH = "instance-remove"
3436
  HTYPE = constants.HTYPE_INSTANCE
3437
  _OP_REQP = ["instance_name", "ignore_failures"]
3438
  REQ_BGL = False
3439

    
3440
  def ExpandNames(self):
3441
    self._ExpandAndLockInstance()
3442
    self.needed_locks[locking.LEVEL_NODE] = []
3443
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3444

    
3445
  def DeclareLocks(self, level):
3446
    if level == locking.LEVEL_NODE:
3447
      self._LockInstancesNodes()
3448

    
3449
  def BuildHooksEnv(self):
3450
    """Build hooks env.
3451

3452
    This runs on master, primary and secondary nodes of the instance.
3453

3454
    """
3455
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3456
    nl = [self.cfg.GetMasterNode()]
3457
    return env, nl, nl
3458

    
3459
  def CheckPrereq(self):
3460
    """Check prerequisites.
3461

3462
    This checks that the instance is in the cluster.
3463

3464
    """
3465
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3466
    assert self.instance is not None, \
3467
      "Cannot retrieve locked instance %s" % self.op.instance_name
3468

    
3469
  def Exec(self, feedback_fn):
3470
    """Remove the instance.
3471

3472
    """
3473
    instance = self.instance
3474
    logging.info("Shutting down instance %s on node %s",
3475
                 instance.name, instance.primary_node)
3476

    
3477
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
3478
    msg = result.fail_msg
3479
    if msg:
3480
      if self.op.ignore_failures:
3481
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
3482
      else:
3483
        raise errors.OpExecError("Could not shutdown instance %s on"
3484
                                 " node %s: %s" %
3485
                                 (instance.name, instance.primary_node, msg))
3486

    
3487
    logging.info("Removing block devices for instance %s", instance.name)
3488

    
3489
    if not _RemoveDisks(self, instance):
3490
      if self.op.ignore_failures:
3491
        feedback_fn("Warning: can't remove instance's disks")
3492
      else:
3493
        raise errors.OpExecError("Can't remove instance's disks")
3494

    
3495
    logging.info("Removing instance %s out of cluster config", instance.name)
3496

    
3497
    self.cfg.RemoveInstance(instance.name)
3498
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
3499

    
3500

    
3501
class LUQueryInstances(NoHooksLU):
3502
  """Logical unit for querying instances.
3503

3504
  """
3505
  _OP_REQP = ["output_fields", "names", "use_locking"]
3506
  REQ_BGL = False
3507
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
3508
                                    "admin_state",
3509
                                    "disk_template", "ip", "mac", "bridge",
3510
                                    "nic_mode", "nic_link",
3511
                                    "sda_size", "sdb_size", "vcpus", "tags",
3512
                                    "network_port", "beparams",
3513
                                    r"(disk)\.(size)/([0-9]+)",
3514
                                    r"(disk)\.(sizes)", "disk_usage",
3515
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
3516
                                    r"(nic)\.(bridge)/([0-9]+)",
3517
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
3518
                                    r"(disk|nic)\.(count)",
3519
                                    "serial_no", "hypervisor", "hvparams",] +
3520
                                  ["hv/%s" % name
3521
                                   for name in constants.HVS_PARAMETERS] +
3522
                                  ["be/%s" % name
3523
                                   for name in constants.BES_PARAMETERS])
3524
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
3525

    
3526

    
3527
  def ExpandNames(self):
3528
    _CheckOutputFields(static=self._FIELDS_STATIC,
3529
                       dynamic=self._FIELDS_DYNAMIC,
3530
                       selected=self.op.output_fields)
3531

    
3532
    self.needed_locks = {}
3533
    self.share_locks[locking.LEVEL_INSTANCE] = 1
3534
    self.share_locks[locking.LEVEL_NODE] = 1
3535

    
3536
    if self.op.names:
3537
      self.wanted = _GetWantedInstances(self, self.op.names)
3538
    else:
3539
      self.wanted = locking.ALL_SET
3540

    
3541
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3542
    self.do_locking = self.do_node_query and self.op.use_locking
3543
    if self.do_locking:
3544
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3545
      self.needed_locks[locking.LEVEL_NODE] = []
3546
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3547

    
3548
  def DeclareLocks(self, level):
3549
    if level == locking.LEVEL_NODE and self.do_locking:
3550
      self._LockInstancesNodes()
3551

    
3552
  def CheckPrereq(self):
3553
    """Check prerequisites.
3554

3555
    """
3556
    pass
3557

    
3558
  def Exec(self, feedback_fn):
3559
    """Computes the list of nodes and their attributes.
3560

3561
    """
3562
    all_info = self.cfg.GetAllInstancesInfo()
3563
    if self.wanted == locking.ALL_SET:
3564
      # caller didn't specify instance names, so ordering is not important
3565
      if self.do_locking:
3566
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
3567
      else:
3568
        instance_names = all_info.keys()
3569
      instance_names = utils.NiceSort(instance_names)
3570
    else:
3571
      # caller did specify names, so we must keep the ordering
3572
      if self.do_locking:
3573
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
3574
      else:
3575
        tgt_set = all_info.keys()
3576
      missing = set(self.wanted).difference(tgt_set)
3577
      if missing:
3578
        raise errors.OpExecError("Some instances were removed before"
3579
                                 " retrieving their data: %s" % missing)
3580
      instance_names = self.wanted
3581

    
3582
    instance_list = [all_info[iname] for iname in instance_names]
3583

    
3584
    # begin data gathering
3585

    
3586
    nodes = frozenset([inst.primary_node for inst in instance_list])
3587
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
3588

    
3589
    bad_nodes = []
3590
    off_nodes = []
3591
    if self.do_node_query:
3592
      live_data = {}
3593
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
3594
      for name in nodes:
3595
        result = node_data[name]
3596
        if result.offline:
3597
          # offline nodes will be in both lists
3598
          off_nodes.append(name)
3599
        if result.failed or result.fail_msg:
3600
          bad_nodes.append(name)
3601
        else:
3602
          if result.payload:
3603
            live_data.update(result.payload)
3604
          # else no instance is alive
3605
    else:
3606
      live_data = dict([(name, {}) for name in instance_names])
3607

    
3608
    # end data gathering
3609

    
3610
    HVPREFIX = "hv/"
3611
    BEPREFIX = "be/"
3612
    output = []
3613
    cluster = self.cfg.GetClusterInfo()
3614
    for instance in instance_list:
3615
      iout = []
3616
      i_hv = cluster.FillHV(instance)
3617
      i_be = cluster.FillBE(instance)
3618
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
3619
                                 nic.nicparams) for nic in instance.nics]
3620
      for field in self.op.output_fields:
3621
        st_match = self._FIELDS_STATIC.Matches(field)
3622
        if field == "name":
3623
          val = instance.name
3624
        elif field == "os":
3625
          val = instance.os
3626
        elif field == "pnode":
3627
          val = instance.primary_node
3628
        elif field == "snodes":
3629
          val = list(instance.secondary_nodes)
3630
        elif field == "admin_state":
3631
          val = instance.admin_up
3632
        elif field == "oper_state":
3633
          if instance.primary_node in bad_nodes:
3634
            val = None
3635
          else:
3636
            val = bool(live_data.get(instance.name))
3637
        elif field == "status":
3638
          if instance.primary_node in off_nodes:
3639
            val = "ERROR_nodeoffline"
3640
          elif instance.primary_node in bad_nodes:
3641
            val = "ERROR_nodedown"
3642
          else:
3643
            running = bool(live_data.get(instance.name))
3644
            if running:
3645
              if instance.admin_up:
3646
                val = "running"
3647
              else:
3648
                val = "ERROR_up"
3649
            else:
3650
              if instance.admin_up:
3651
                val = "ERROR_down"
3652
              else:
3653
                val = "ADMIN_down"
3654
        elif field == "oper_ram":
3655
          if instance.primary_node in bad_nodes:
3656
            val = None
3657
          elif instance.name in live_data:
3658
            val = live_data[instance.name].get("memory", "?")
3659
          else:
3660
            val = "-"
3661
        elif field == "vcpus":
3662
          val = i_be[constants.BE_VCPUS]
3663
        elif field == "disk_template":
3664
          val = instance.disk_template
3665
        elif field == "ip":
3666
          if instance.nics:
3667
            val = instance.nics[0].ip
3668
          else:
3669
            val = None
3670
        elif field == "nic_mode":
3671
          if instance.nics:
3672
            val = i_nicp[0][constants.NIC_MODE]
3673
          else:
3674
            val = None
3675
        elif field == "nic_link":
3676
          if instance.nics:
3677
            val = i_nicp[0][constants.NIC_LINK]
3678
          else:
3679
            val = None
3680
        elif field == "bridge":
3681
          if (instance.nics and
3682
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
3683
            val = i_nicp[0][constants.NIC_LINK]
3684
          else:
3685
            val = None
3686
        elif field == "mac":
3687
          if instance.nics:
3688
            val = instance.nics[0].mac
3689
          else:
3690
            val = None
3691
        elif field == "sda_size" or field == "sdb_size":
3692
          idx = ord(field[2]) - ord('a')
3693
          try:
3694
            val = instance.FindDisk(idx).size
3695
          except errors.OpPrereqError:
3696
            val = None
3697
        elif field == "disk_usage": # total disk usage per node
3698
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
3699
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
3700
        elif field == "tags":
3701
          val = list(instance.GetTags())
3702
        elif field == "serial_no":
3703
          val = instance.serial_no
3704
        elif field == "network_port":
3705
          val = instance.network_port
3706
        elif field == "hypervisor":
3707
          val = instance.hypervisor
3708
        elif field == "hvparams":
3709
          val = i_hv
3710
        elif (field.startswith(HVPREFIX) and
3711
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
3712
          val = i_hv.get(field[len(HVPREFIX):], None)
3713
        elif field == "beparams":
3714
          val = i_be
3715
        elif (field.startswith(BEPREFIX) and
3716
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
3717
          val = i_be.get(field[len(BEPREFIX):], None)
3718
        elif st_match and st_match.groups():
3719
          # matches a variable list
3720
          st_groups = st_match.groups()
3721
          if st_groups and st_groups[0] == "disk":
3722
            if st_groups[1] == "count":
3723
              val = len(instance.disks)
3724
            elif st_groups[1] == "sizes":
3725
              val = [disk.size for disk in instance.disks]
3726
            elif st_groups[1] == "size":
3727
              try:
3728
                val = instance.FindDisk(st_groups[2]).size
3729
              except errors.OpPrereqError:
3730
                val = None
3731
            else:
3732
              assert False, "Unhandled disk parameter"
3733
          elif st_groups[0] == "nic":
3734
            if st_groups[1] == "count":
3735
              val = len(instance.nics)
3736
            elif st_groups[1] == "macs":
3737
              val = [nic.mac for nic in instance.nics]
3738
            elif st_groups[1] == "ips":
3739
              val = [nic.ip for nic in instance.nics]
3740
            elif st_groups[1] == "modes":
3741
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
3742
            elif st_groups[1] == "links":
3743
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
3744
            elif st_groups[1] == "bridges":
3745
              val = []
3746
              for nicp in i_nicp:
3747
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3748
                  val.append(nicp[constants.NIC_LINK])
3749
                else:
3750
                  val.append(None)
3751
            else:
3752
              # index-based item
3753
              nic_idx = int(st_groups[2])
3754
              if nic_idx >= len(instance.nics):
3755
                val = None
3756
              else:
3757
                if st_groups[1] == "mac":
3758
                  val = instance.nics[nic_idx].mac
3759
                elif st_groups[1] == "ip":
3760
                  val = instance.nics[nic_idx].ip
3761
                elif st_groups[1] == "mode":
3762
                  val = i_nicp[nic_idx][constants.NIC_MODE]
3763
                elif st_groups[1] == "link":
3764
                  val = i_nicp[nic_idx][constants.NIC_LINK]
3765
                elif st_groups[1] == "bridge":
3766
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
3767
                  if nic_mode == constants.NIC_MODE_BRIDGED:
3768
                    val = i_nicp[nic_idx][constants.NIC_LINK]
3769
                  else:
3770
                    val = None
3771
                else:
3772
                  assert False, "Unhandled NIC parameter"
3773
          else:
3774
            assert False, ("Declared but unhandled variable parameter '%s'" %
3775
                           field)
3776
        else:
3777
          assert False, "Declared but unhandled parameter '%s'" % field
3778
        iout.append(val)
3779
      output.append(iout)
3780

    
3781
    return output
3782

    
3783

    
3784
class LUFailoverInstance(LogicalUnit):
3785
  """Failover an instance.
3786

3787
  """
3788
  HPATH = "instance-failover"
3789
  HTYPE = constants.HTYPE_INSTANCE
3790
  _OP_REQP = ["instance_name", "ignore_consistency"]
3791
  REQ_BGL = False
3792

    
3793
  def ExpandNames(self):
3794
    self._ExpandAndLockInstance()
3795
    self.needed_locks[locking.LEVEL_NODE] = []
3796
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3797

    
3798
  def DeclareLocks(self, level):
3799
    if level == locking.LEVEL_NODE:
3800
      self._LockInstancesNodes()
3801

    
3802
  def BuildHooksEnv(self):
3803
    """Build hooks env.
3804

3805
    This runs on master, primary and secondary nodes of the instance.
3806

3807
    """
3808
    env = {
3809
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
3810
      }
3811
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3812
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
3813
    return env, nl, nl
3814

    
3815
  def CheckPrereq(self):
3816
    """Check prerequisites.
3817

3818
    This checks that the instance is in the cluster.
3819

3820
    """
3821
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3822
    assert self.instance is not None, \
3823
      "Cannot retrieve locked instance %s" % self.op.instance_name
3824

    
3825
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3826
    if instance.disk_template not in constants.DTS_NET_MIRROR:
3827
      raise errors.OpPrereqError("Instance's disk layout is not"
3828
                                 " network mirrored, cannot failover.")
3829

    
3830
    secondary_nodes = instance.secondary_nodes
3831
    if not secondary_nodes:
3832
      raise errors.ProgrammerError("no secondary node but using "
3833
                                   "a mirrored disk template")
3834

    
3835
    target_node = secondary_nodes[0]
3836
    _CheckNodeOnline(self, target_node)
3837
    _CheckNodeNotDrained(self, target_node)
3838
    if instance.admin_up:
3839
      # check memory requirements on the secondary node
3840
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
3841
                           instance.name, bep[constants.BE_MEMORY],
3842
                           instance.hypervisor)
3843
    else:
3844
      self.LogInfo("Not checking memory on the secondary node as"
3845
                   " instance will not be started")
3846

    
3847
    # check bridge existance
3848
    _CheckInstanceBridgesExist(self, instance, node=target_node)
3849

    
3850
  def Exec(self, feedback_fn):
3851
    """Failover an instance.
3852

3853
    The failover is done by shutting it down on its present node and
3854
    starting it on the secondary.
3855

3856
    """
3857
    instance = self.instance
3858

    
3859
    source_node = instance.primary_node
3860
    target_node = instance.secondary_nodes[0]
3861

    
3862
    feedback_fn("* checking disk consistency between source and target")
3863
    for dev in instance.disks:
3864
      # for drbd, these are drbd over lvm
3865
      if not _CheckDiskConsistency(self, dev, target_node, False):
3866
        if instance.admin_up and not self.op.ignore_consistency:
3867
          raise errors.OpExecError("Disk %s is degraded on target node,"
3868
                                   " aborting failover." % dev.iv_name)
3869

    
3870
    feedback_fn("* shutting down instance on source node")
3871
    logging.info("Shutting down instance %s on node %s",
3872
                 instance.name, source_node)
3873

    
3874
    result = self.rpc.call_instance_shutdown(source_node, instance)
3875
    msg = result.fail_msg
3876
    if msg:
3877
      if self.op.ignore_consistency:
3878
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
3879
                             " Proceeding anyway. Please make sure node"
3880
                             " %s is down. Error details: %s",
3881
                             instance.name, source_node, source_node, msg)
3882
      else:
3883
        raise errors.OpExecError("Could not shutdown instance %s on"
3884
                                 " node %s: %s" %
3885
                                 (instance.name, source_node, msg))
3886

    
3887
    feedback_fn("* deactivating the instance's disks on source node")
3888
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
3889
      raise errors.OpExecError("Can't shut down the instance's disks.")
3890

    
3891
    instance.primary_node = target_node
3892
    # distribute new instance config to the other nodes
3893
    self.cfg.Update(instance)
3894

    
3895
    # Only start the instance if it's marked as up
3896
    if instance.admin_up:
3897
      feedback_fn("* activating the instance's disks on target node")
3898
      logging.info("Starting instance %s on node %s",
3899
                   instance.name, target_node)
3900

    
3901
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
3902
                                               ignore_secondaries=True)
3903
      if not disks_ok:
3904
        _ShutdownInstanceDisks(self, instance)
3905
        raise errors.OpExecError("Can't activate the instance's disks")
3906

    
3907
      feedback_fn("* starting the instance on the target node")
3908
      result = self.rpc.call_instance_start(target_node, instance, None, None)
3909
      msg = result.fail_msg
3910
      if msg:
3911
        _ShutdownInstanceDisks(self, instance)
3912
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
3913
                                 (instance.name, target_node, msg))
3914

    
3915

    
3916
class LUMigrateInstance(LogicalUnit):
3917
  """Migrate an instance.
3918

3919
  This is migration without shutting down, compared to the failover,
3920
  which is done with shutdown.
3921

3922
  """
3923
  HPATH = "instance-migrate"
3924
  HTYPE = constants.HTYPE_INSTANCE
3925
  _OP_REQP = ["instance_name", "live", "cleanup"]
3926

    
3927
  REQ_BGL = False
3928

    
3929
  def ExpandNames(self):
3930
    self._ExpandAndLockInstance()
3931

    
3932
    self.needed_locks[locking.LEVEL_NODE] = []
3933
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3934

    
3935
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
3936
                                       self.op.live, self.op.cleanup)
3937
    self.tasklets = [self._migrater]
3938

    
3939
  def DeclareLocks(self, level):
3940
    if level == locking.LEVEL_NODE:
3941
      self._LockInstancesNodes()
3942

    
3943
  def BuildHooksEnv(self):
3944
    """Build hooks env.
3945

3946
    This runs on master, primary and secondary nodes of the instance.
3947

3948
    """
3949
    instance = self._migrater.instance
3950
    env = _BuildInstanceHookEnvByObject(self, instance)
3951
    env["MIGRATE_LIVE"] = self.op.live
3952
    env["MIGRATE_CLEANUP"] = self.op.cleanup
3953
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
3954
    return env, nl, nl
3955

    
3956

    
3957
class LUMigrateNode(LogicalUnit):
3958
  """Migrate all instances from a node.
3959

3960
  """
3961
  HPATH = "node-migrate"
3962
  HTYPE = constants.HTYPE_NODE
3963
  _OP_REQP = ["node_name", "live"]
3964
  REQ_BGL = False
3965

    
3966
  def ExpandNames(self):
3967
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
3968
    if self.op.node_name is None:
3969
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
3970

    
3971
    self.needed_locks = {
3972
      locking.LEVEL_NODE: [self.op.node_name],
3973
      }
3974

    
3975
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3976

    
3977
    # Create tasklets for migrating instances for all instances on this node
3978
    names = []
3979
    tasklets = []
3980

    
3981
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
3982
      logging.debug("Migrating instance %s", inst.name)
3983
      names.append(inst.name)
3984

    
3985
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
3986

    
3987
    self.tasklets = tasklets
3988

    
3989
    # Declare instance locks
3990
    self.needed_locks[locking.LEVEL_INSTANCE] = names
3991

    
3992
  def DeclareLocks(self, level):
3993
    if level == locking.LEVEL_NODE:
3994
      self._LockInstancesNodes()
3995

    
3996
  def BuildHooksEnv(self):
3997
    """Build hooks env.
3998

3999
    This runs on the master, the primary and all the secondaries.
4000

4001
    """
4002
    env = {
4003
      "NODE_NAME": self.op.node_name,
4004
      }
4005

    
4006
    nl = [self.cfg.GetMasterNode()]
4007

    
4008
    return (env, nl, nl)
4009

    
4010

    
4011
class TLMigrateInstance(Tasklet):
4012
  def __init__(self, lu, instance_name, live, cleanup):
4013
    """Initializes this class.
4014

4015
    """
4016
    Tasklet.__init__(self, lu)
4017

    
4018
    # Parameters
4019
    self.instance_name = instance_name
4020
    self.live = live
4021
    self.cleanup = cleanup
4022

    
4023
  def CheckPrereq(self):
4024
    """Check prerequisites.
4025

4026
    This checks that the instance is in the cluster.
4027

4028
    """
4029
    instance = self.cfg.GetInstanceInfo(
4030
      self.cfg.ExpandInstanceName(self.instance_name))
4031
    if instance is None:
4032
      raise errors.OpPrereqError("Instance '%s' not known" %
4033
                                 self.instance_name)
4034

    
4035
    if instance.disk_template != constants.DT_DRBD8:
4036
      raise errors.OpPrereqError("Instance's disk layout is not"
4037
                                 " drbd8, cannot migrate.")
4038

    
4039
    secondary_nodes = instance.secondary_nodes
4040
    if not secondary_nodes:
4041
      raise errors.ConfigurationError("No secondary node but using"
4042
                                      " drbd8 disk template")
4043

    
4044
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
4045

    
4046
    target_node = secondary_nodes[0]
4047
    # check memory requirements on the secondary node
4048
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
4049
                         instance.name, i_be[constants.BE_MEMORY],
4050
                         instance.hypervisor)
4051

    
4052
    # check bridge existance
4053
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4054

    
4055
    if not self.cleanup:
4056
      _CheckNodeNotDrained(self, target_node)
4057
      result = self.rpc.call_instance_migratable(instance.primary_node,
4058
                                                 instance)
4059
      result.Raise("Can't migrate, please use failover", prereq=True)
4060

    
4061
    self.instance = instance
4062

    
4063
  def _WaitUntilSync(self):
4064
    """Poll with custom rpc for disk sync.
4065

4066
    This uses our own step-based rpc call.
4067

4068
    """
4069
    self.feedback_fn("* wait until resync is done")
4070
    all_done = False
4071
    while not all_done:
4072
      all_done = True
4073
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
4074
                                            self.nodes_ip,
4075
                                            self.instance.disks)
4076
      min_percent = 100
4077
      for node, nres in result.items():
4078
        nres.Raise("Cannot resync disks on node %s" % node)
4079
        node_done, node_percent = nres.payload
4080
        all_done = all_done and node_done
4081
        if node_percent is not None:
4082
          min_percent = min(min_percent, node_percent)
4083
      if not all_done:
4084
        if min_percent < 100:
4085
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
4086
        time.sleep(2)
4087

    
4088
  def _EnsureSecondary(self, node):
4089
    """Demote a node to secondary.
4090

4091
    """
4092
    self.feedback_fn("* switching node %s to secondary mode" % node)
4093

    
4094
    for dev in self.instance.disks:
4095
      self.cfg.SetDiskID(dev, node)
4096

    
4097
    result = self.rpc.call_blockdev_close(node, self.instance.name,
4098
                                          self.instance.disks)
4099
    result.Raise("Cannot change disk to secondary on node %s" % node)
4100

    
4101
  def _GoStandalone(self):
4102
    """Disconnect from the network.
4103

4104
    """
4105
    self.feedback_fn("* changing into standalone mode")
4106
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
4107
                                               self.instance.disks)
4108
    for node, nres in result.items():
4109
      nres.Raise("Cannot disconnect disks node %s" % node)
4110

    
4111
  def _GoReconnect(self, multimaster):
4112
    """Reconnect to the network.
4113

4114
    """
4115
    if multimaster:
4116
      msg = "dual-master"
4117
    else:
4118
      msg = "single-master"
4119
    self.feedback_fn("* changing disks into %s mode" % msg)
4120
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
4121
                                           self.instance.disks,
4122
                                           self.instance.name, multimaster)
4123
    for node, nres in result.items():
4124
      nres.Raise("Cannot change disks config on node %s" % node)
4125

    
4126
  def _ExecCleanup(self):
4127
    """Try to cleanup after a failed migration.
4128

4129
    The cleanup is done by:
4130
      - check that the instance is running only on one node
4131
        (and update the config if needed)
4132
      - change disks on its secondary node to secondary
4133
      - wait until disks are fully synchronized
4134
      - disconnect from the network
4135
      - change disks into single-master mode
4136
      - wait again until disks are fully synchronized
4137

4138
    """
4139
    instance = self.instance
4140
    target_node = self.target_node
4141
    source_node = self.source_node
4142

    
4143
    # check running on only one node
4144
    self.feedback_fn("* checking where the instance actually runs"
4145
                     " (if this hangs, the hypervisor might be in"
4146
                     " a bad state)")
4147
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
4148
    for node, result in ins_l.items():
4149
      result.Raise("Can't contact node %s" % node)
4150

    
4151
    runningon_source = instance.name in ins_l[source_node].payload
4152
    runningon_target = instance.name in ins_l[target_node].payload
4153

    
4154
    if runningon_source and runningon_target:
4155
      raise errors.OpExecError("Instance seems to be running on two nodes,"
4156
                               " or the hypervisor is confused. You will have"
4157
                               " to ensure manually that it runs only on one"
4158
                               " and restart this operation.")
4159

    
4160
    if not (runningon_source or runningon_target):
4161
      raise errors.OpExecError("Instance does not seem to be running at all."
4162
                               " In this case, it's safer to repair by"
4163
                               " running 'gnt-instance stop' to ensure disk"
4164
                               " shutdown, and then restarting it.")
4165

    
4166
    if runningon_target:
4167
      # the migration has actually succeeded, we need to update the config
4168
      self.feedback_fn("* instance running on secondary node (%s),"
4169
                       " updating config" % target_node)
4170
      instance.primary_node = target_node
4171
      self.cfg.Update(instance)
4172
      demoted_node = source_node
4173
    else:
4174
      self.feedback_fn("* instance confirmed to be running on its"
4175
                       " primary node (%s)" % source_node)
4176
      demoted_node = target_node
4177

    
4178
    self._EnsureSecondary(demoted_node)
4179
    try:
4180
      self._WaitUntilSync()
4181
    except errors.OpExecError:
4182
      # we ignore here errors, since if the device is standalone, it
4183
      # won't be able to sync
4184
      pass
4185
    self._GoStandalone()
4186
    self._GoReconnect(False)
4187
    self._WaitUntilSync()
4188

    
4189
    self.feedback_fn("* done")
4190

    
4191
  def _RevertDiskStatus(self):
4192
    """Try to revert the disk status after a failed migration.
4193

4194
    """
4195
    target_node = self.target_node
4196
    try:
4197
      self._EnsureSecondary(target_node)
4198
      self._GoStandalone()
4199
      self._GoReconnect(False)
4200
      self._WaitUntilSync()
4201
    except errors.OpExecError, err:
4202
      self.lu.LogWarning("Migration failed and I can't reconnect the"
4203
                         " drives: error '%s'\n"
4204
                         "Please look and recover the instance status" %
4205
                         str(err))
4206

    
4207
  def _AbortMigration(self):
4208
    """Call the hypervisor code to abort a started migration.
4209

4210
    """
4211
    instance = self.instance
4212
    target_node = self.target_node
4213
    migration_info = self.migration_info
4214

    
4215
    abort_result = self.rpc.call_finalize_migration(target_node,
4216
                                                    instance,
4217
                                                    migration_info,
4218
                                                    False)
4219
    abort_msg = abort_result.fail_msg
4220
    if abort_msg:
4221
      logging.error("Aborting migration failed on target node %s: %s" %
4222
                    (target_node, abort_msg))
4223
      # Don't raise an exception here, as we stil have to try to revert the
4224
      # disk status, even if this step failed.
4225

    
4226
  def _ExecMigration(self):
4227
    """Migrate an instance.
4228

4229
    The migrate is done by:
4230
      - change the disks into dual-master mode
4231
      - wait until disks are fully synchronized again
4232
      - migrate the instance
4233
      - change disks on the new secondary node (the old primary) to secondary
4234
      - wait until disks are fully synchronized
4235
      - change disks into single-master mode
4236

4237
    """
4238
    instance = self.instance
4239
    target_node = self.target_node
4240
    source_node = self.source_node
4241

    
4242
    self.feedback_fn("* checking disk consistency between source and target")
4243
    for dev in instance.disks:
4244
      if not _CheckDiskConsistency(self, dev, target_node, False):
4245
        raise errors.OpExecError("Disk %s is degraded or not fully"
4246
                                 " synchronized on target node,"
4247
                                 " aborting migrate." % dev.iv_name)
4248

    
4249
    # First get the migration information from the remote node
4250
    result = self.rpc.call_migration_info(source_node, instance)
4251
    msg = result.fail_msg
4252
    if msg:
4253
      log_err = ("Failed fetching source migration information from %s: %s" %
4254
                 (source_node, msg))
4255
      logging.error(log_err)
4256
      raise errors.OpExecError(log_err)
4257

    
4258
    self.migration_info = migration_info = result.payload
4259

    
4260
    # Then switch the disks to master/master mode
4261
    self._EnsureSecondary(target_node)
4262
    self._GoStandalone()
4263
    self._GoReconnect(True)
4264
    self._WaitUntilSync()
4265

    
4266
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
4267
    result = self.rpc.call_accept_instance(target_node,
4268
                                           instance,
4269
                                           migration_info,
4270
                                           self.nodes_ip[target_node])
4271

    
4272
    msg = result.fail_msg
4273
    if msg:
4274
      logging.error("Instance pre-migration failed, trying to revert"
4275
                    " disk status: %s", msg)
4276
      self._AbortMigration()
4277
      self._RevertDiskStatus()
4278
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
4279
                               (instance.name, msg))
4280

    
4281
    self.feedback_fn("* migrating instance to %s" % target_node)
4282
    time.sleep(10)
4283
    result = self.rpc.call_instance_migrate(source_node, instance,
4284
                                            self.nodes_ip[target_node],
4285
                                            self.live)
4286
    msg = result.fail_msg
4287
    if msg:
4288
      logging.error("Instance migration failed, trying to revert"
4289
                    " disk status: %s", msg)
4290
      self._AbortMigration()
4291
      self._RevertDiskStatus()
4292
      raise errors.OpExecError("Could not migrate instance %s: %s" %
4293
                               (instance.name, msg))
4294
    time.sleep(10)
4295

    
4296
    instance.primary_node = target_node
4297
    # distribute new instance config to the other nodes
4298
    self.cfg.Update(instance)
4299

    
4300
    result = self.rpc.call_finalize_migration(target_node,
4301
                                              instance,
4302
                                              migration_info,
4303
                                              True)
4304
    msg = result.fail_msg
4305
    if msg:
4306
      logging.error("Instance migration succeeded, but finalization failed:"
4307
                    " %s" % msg)
4308
      raise errors.OpExecError("Could not finalize instance migration: %s" %
4309
                               msg)
4310

    
4311
    self._EnsureSecondary(source_node)
4312
    self._WaitUntilSync()
4313
    self._GoStandalone()
4314
    self._GoReconnect(False)
4315
    self._WaitUntilSync()
4316

    
4317
    self.feedback_fn("* done")
4318

    
4319
  def Exec(self, feedback_fn):
4320
    """Perform the migration.
4321

4322
    """
4323
    feedback_fn("Migrating instance %s" % self.instance.name)
4324

    
4325
    self.feedback_fn = feedback_fn
4326

    
4327
    self.source_node = self.instance.primary_node
4328
    self.target_node = self.instance.secondary_nodes[0]
4329
    self.all_nodes = [self.source_node, self.target_node]
4330
    self.nodes_ip = {
4331
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
4332
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
4333
      }
4334

    
4335
    if self.cleanup:
4336
      return self._ExecCleanup()
4337
    else:
4338
      return self._ExecMigration()
4339

    
4340

    
4341
def _CreateBlockDev(lu, node, instance, device, force_create,
4342
                    info, force_open):
4343
  """Create a tree of block devices on a given node.
4344

4345
  If this device type has to be created on secondaries, create it and
4346
  all its children.
4347

4348
  If not, just recurse to children keeping the same 'force' value.
4349

4350
  @param lu: the lu on whose behalf we execute
4351
  @param node: the node on which to create the device
4352
  @type instance: L{objects.Instance}
4353
  @param instance: the instance which owns the device
4354
  @type device: L{objects.Disk}
4355
  @param device: the device to create
4356
  @type force_create: boolean
4357
  @param force_create: whether to force creation of this device; this
4358
      will be change to True whenever we find a device which has
4359
      CreateOnSecondary() attribute
4360
  @param info: the extra 'metadata' we should attach to the device
4361
      (this will be represented as a LVM tag)
4362
  @type force_open: boolean
4363
  @param force_open: this parameter will be passes to the
4364
      L{backend.BlockdevCreate} function where it specifies
4365
      whether we run on primary or not, and it affects both
4366
      the child assembly and the device own Open() execution
4367

4368
  """
4369
  if device.CreateOnSecondary():
4370
    force_create = True
4371

    
4372
  if device.children:
4373
    for child in device.children:
4374
      _CreateBlockDev(lu, node, instance, child, force_create,
4375
                      info, force_open)
4376

    
4377
  if not force_create:
4378
    return
4379

    
4380
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
4381

    
4382

    
4383
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
4384
  """Create a single block device on a given node.
4385

4386
  This will not recurse over children of the device, so they must be
4387
  created in advance.
4388

4389
  @param lu: the lu on whose behalf we execute
4390
  @param node: the node on which to create the device
4391
  @type instance: L{objects.Instance}
4392
  @param instance: the instance which owns the device
4393
  @type device: L{objects.Disk}
4394
  @param device: the device to create
4395
  @param info: the extra 'metadata' we should attach to the device
4396
      (this will be represented as a LVM tag)
4397
  @type force_open: boolean
4398
  @param force_open: this parameter will be passes to the
4399
      L{backend.BlockdevCreate} function where it specifies
4400
      whether we run on primary or not, and it affects both
4401
      the child assembly and the device own Open() execution
4402

4403
  """
4404
  lu.cfg.SetDiskID(device, node)
4405
  result = lu.rpc.call_blockdev_create(node, device, device.size,
4406
                                       instance.name, force_open, info)
4407
  result.Raise("Can't create block device %s on"
4408
               " node %s for instance %s" % (device, node, instance.name))
4409
  if device.physical_id is None:
4410
    device.physical_id = result.payload
4411

    
4412

    
4413
def _GenerateUniqueNames(lu, exts):
4414
  """Generate a suitable LV name.
4415

4416
  This will generate a logical volume name for the given instance.
4417

4418
  """
4419
  results = []
4420
  for val in exts:
4421
    new_id = lu.cfg.GenerateUniqueID()
4422
    results.append("%s%s" % (new_id, val))
4423
  return results
4424

    
4425

    
4426
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
4427
                         p_minor, s_minor):
4428
  """Generate a drbd8 device complete with its children.
4429

4430
  """
4431
  port = lu.cfg.AllocatePort()
4432
  vgname = lu.cfg.GetVGName()
4433
  shared_secret = lu.cfg.GenerateDRBDSecret()
4434
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
4435
                          logical_id=(vgname, names[0]))
4436
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
4437
                          logical_id=(vgname, names[1]))
4438
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
4439
                          logical_id=(primary, secondary, port,
4440
                                      p_minor, s_minor,
4441
                                      shared_secret),
4442
                          children=[dev_data, dev_meta],
4443
                          iv_name=iv_name)
4444
  return drbd_dev
4445

    
4446

    
4447
def _GenerateDiskTemplate(lu, template_name,
4448
                          instance_name, primary_node,
4449
                          secondary_nodes, disk_info,
4450
                          file_storage_dir, file_driver,
4451
                          base_index):
4452
  """Generate the entire disk layout for a given template type.
4453

4454
  """
4455
  #TODO: compute space requirements
4456

    
4457
  vgname = lu.cfg.GetVGName()
4458
  disk_count = len(disk_info)
4459
  disks = []
4460
  if template_name == constants.DT_DISKLESS:
4461
    pass
4462
  elif template_name == constants.DT_PLAIN:
4463
    if len(secondary_nodes) != 0:
4464
      raise errors.ProgrammerError("Wrong template configuration")
4465

    
4466
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
4467
                                      for i in range(disk_count)])
4468
    for idx, disk in enumerate(disk_info):
4469
      disk_index = idx + base_index
4470
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
4471
                              logical_id=(vgname, names[idx]),
4472
                              iv_name="disk/%d" % disk_index,
4473
                              mode=disk["mode"])
4474
      disks.append(disk_dev)
4475
  elif template_name == constants.DT_DRBD8:
4476
    if len(secondary_nodes) != 1:
4477
      raise errors.ProgrammerError("Wrong template configuration")
4478
    remote_node = secondary_nodes[0]
4479
    minors = lu.cfg.AllocateDRBDMinor(
4480
      [primary_node, remote_node] * len(disk_info), instance_name)
4481

    
4482
    names = []
4483
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
4484
                                               for i in range(disk_count)]):
4485
      names.append(lv_prefix + "_data")
4486
      names.append(lv_prefix + "_meta")
4487
    for idx, disk in enumerate(disk_info):
4488
      disk_index = idx + base_index
4489
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
4490
                                      disk["size"], names[idx*2:idx*2+2],
4491
                                      "disk/%d" % disk_index,
4492
                                      minors[idx*2], minors[idx*2+1])
4493
      disk_dev.mode = disk["mode"]
4494
      disks.append(disk_dev)
4495
  elif template_name == constants.DT_FILE:
4496
    if len(secondary_nodes) != 0:
4497
      raise errors.ProgrammerError("Wrong template configuration")
4498

    
4499
    for idx, disk in enumerate(disk_info):
4500
      disk_index = idx + base_index
4501
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
4502
                              iv_name="disk/%d" % disk_index,
4503
                              logical_id=(file_driver,
4504
                                          "%s/disk%d" % (file_storage_dir,
4505
                                                         disk_index)),
4506
                              mode=disk["mode"])
4507
      disks.append(disk_dev)
4508
  else:
4509
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
4510
  return disks
4511

    
4512

    
4513
def _GetInstanceInfoText(instance):
4514
  """Compute that text that should be added to the disk's metadata.
4515

4516
  """
4517
  return "originstname+%s" % instance.name
4518

    
4519

    
4520
def _CreateDisks(lu, instance):
4521
  """Create all disks for an instance.
4522

4523
  This abstracts away some work from AddInstance.
4524

4525
  @type lu: L{LogicalUnit}
4526
  @param lu: the logical unit on whose behalf we execute
4527
  @type instance: L{objects.Instance}
4528
  @param instance: the instance whose disks we should create
4529
  @rtype: boolean
4530
  @return: the success of the creation
4531

4532
  """
4533
  info = _GetInstanceInfoText(instance)
4534
  pnode = instance.primary_node
4535

    
4536
  if instance.disk_template == constants.DT_FILE:
4537
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4538
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
4539

    
4540
    result.Raise("Failed to create directory '%s' on"
4541
                 " node %s: %s" % (file_storage_dir, pnode))
4542

    
4543
  # Note: this needs to be kept in sync with adding of disks in
4544
  # LUSetInstanceParams
4545
  for device in instance.disks:
4546
    logging.info("Creating volume %s for instance %s",
4547
                 device.iv_name, instance.name)
4548
    #HARDCODE
4549
    for node in instance.all_nodes:
4550
      f_create = node == pnode
4551
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
4552

    
4553

    
4554
def _RemoveDisks(lu, instance):
4555
  """Remove all disks for an instance.
4556

4557
  This abstracts away some work from `AddInstance()` and
4558
  `RemoveInstance()`. Note that in case some of the devices couldn't
4559
  be removed, the removal will continue with the other ones (compare
4560
  with `_CreateDisks()`).
4561

4562
  @type lu: L{LogicalUnit}
4563
  @param lu: the logical unit on whose behalf we execute
4564
  @type instance: L{objects.Instance}
4565
  @param instance: the instance whose disks we should remove
4566
  @rtype: boolean
4567
  @return: the success of the removal
4568

4569
  """
4570
  logging.info("Removing block devices for instance %s", instance.name)
4571

    
4572
  all_result = True
4573
  for device in instance.disks:
4574
    for node, disk in device.ComputeNodeTree(instance.primary_node):
4575
      lu.cfg.SetDiskID(disk, node)
4576
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
4577
      if msg:
4578
        lu.LogWarning("Could not remove block device %s on node %s,"
4579
                      " continuing anyway: %s", device.iv_name, node, msg)
4580
        all_result = False
4581

    
4582
  if instance.disk_template == constants.DT_FILE:
4583
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4584
    result = lu.rpc.call_file_storage_dir_remove(instance.primary_node,
4585
                                                 file_storage_dir)
4586
    msg = result.fail_msg
4587
    if msg:
4588
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
4589
                    file_storage_dir, instance.primary_node, msg)
4590
      all_result = False
4591

    
4592
  return all_result
4593

    
4594

    
4595
def _ComputeDiskSize(disk_template, disks):
4596
  """Compute disk size requirements in the volume group
4597

4598
  """
4599
  # Required free disk space as a function of disk and swap space
4600
  req_size_dict = {
4601
    constants.DT_DISKLESS: None,
4602
    constants.DT_PLAIN: sum(d["size"] for d in disks),
4603
    # 128 MB are added for drbd metadata for each disk
4604
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
4605
    constants.DT_FILE: None,
4606
  }
4607

    
4608
  if disk_template not in req_size_dict:
4609
    raise errors.ProgrammerError("Disk template '%s' size requirement"
4610
                                 " is unknown" %  disk_template)
4611

    
4612
  return req_size_dict[disk_template]
4613

    
4614

    
4615
def _CheckHVParams(lu, nodenames, hvname, hvparams):
4616
  """Hypervisor parameter validation.
4617

4618
  This function abstract the hypervisor parameter validation to be
4619
  used in both instance create and instance modify.
4620

4621
  @type lu: L{LogicalUnit}
4622
  @param lu: the logical unit for which we check
4623
  @type nodenames: list
4624
  @param nodenames: the list of nodes on which we should check
4625
  @type hvname: string
4626
  @param hvname: the name of the hypervisor we should use
4627
  @type hvparams: dict
4628
  @param hvparams: the parameters which we need to check
4629
  @raise errors.OpPrereqError: if the parameters are not valid
4630

4631
  """
4632
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
4633
                                                  hvname,
4634
                                                  hvparams)
4635
  for node in nodenames:
4636
    info = hvinfo[node]
4637
    if info.offline:
4638
      continue
4639
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
4640

    
4641

    
4642
class LUCreateInstance(LogicalUnit):
4643
  """Create an instance.
4644

4645
  """
4646
  HPATH = "instance-add"
4647
  HTYPE = constants.HTYPE_INSTANCE
4648
  _OP_REQP = ["instance_name", "disks", "disk_template",
4649
              "mode", "start",
4650
              "wait_for_sync", "ip_check", "nics",
4651
              "hvparams", "beparams"]
4652
  REQ_BGL = False
4653

    
4654
  def _ExpandNode(self, node):
4655
    """Expands and checks one node name.
4656

4657
    """
4658
    node_full = self.cfg.ExpandNodeName(node)
4659
    if node_full is None:
4660
      raise errors.OpPrereqError("Unknown node %s" % node)
4661
    return node_full
4662

    
4663
  def ExpandNames(self):
4664
    """ExpandNames for CreateInstance.
4665

4666
    Figure out the right locks for instance creation.
4667

4668
    """
4669
    self.needed_locks = {}
4670

    
4671
    # set optional parameters to none if they don't exist
4672
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
4673
      if not hasattr(self.op, attr):
4674
        setattr(self.op, attr, None)
4675

    
4676
    # cheap checks, mostly valid constants given
4677

    
4678
    # verify creation mode
4679
    if self.op.mode not in (constants.INSTANCE_CREATE,
4680
                            constants.INSTANCE_IMPORT):
4681
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
4682
                                 self.op.mode)
4683

    
4684
    # disk template and mirror node verification
4685
    if self.op.disk_template not in constants.DISK_TEMPLATES:
4686
      raise errors.OpPrereqError("Invalid disk template name")
4687

    
4688
    if self.op.hypervisor is None:
4689
      self.op.hypervisor = self.cfg.GetHypervisorType()
4690

    
4691
    cluster = self.cfg.GetClusterInfo()
4692
    enabled_hvs = cluster.enabled_hypervisors
4693
    if self.op.hypervisor not in enabled_hvs:
4694
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
4695
                                 " cluster (%s)" % (self.op.hypervisor,
4696
                                  ",".join(enabled_hvs)))
4697

    
4698
    # check hypervisor parameter syntax (locally)
4699
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4700
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
4701
                                  self.op.hvparams)
4702
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
4703
    hv_type.CheckParameterSyntax(filled_hvp)
4704
    self.hv_full = filled_hvp
4705

    
4706
    # fill and remember the beparams dict
4707
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4708
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
4709
                                    self.op.beparams)
4710

    
4711
    #### instance parameters check
4712

    
4713
    # instance name verification
4714
    hostname1 = utils.HostInfo(self.op.instance_name)
4715
    self.op.instance_name = instance_name = hostname1.name
4716

    
4717
    # this is just a preventive check, but someone might still add this
4718
    # instance in the meantime, and creation will fail at lock-add time
4719
    if instance_name in self.cfg.GetInstanceList():
4720
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4721
                                 instance_name)
4722

    
4723
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
4724

    
4725
    # NIC buildup
4726
    self.nics = []
4727
    for idx, nic in enumerate(self.op.nics):
4728
      nic_mode_req = nic.get("mode", None)
4729
      nic_mode = nic_mode_req
4730
      if nic_mode is None:
4731
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
4732

    
4733
      # in routed mode, for the first nic, the default ip is 'auto'
4734
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
4735
        default_ip_mode = constants.VALUE_AUTO
4736
      else:
4737
        default_ip_mode = constants.VALUE_NONE
4738

    
4739
      # ip validity checks
4740
      ip = nic.get("ip", default_ip_mode)
4741
      if ip is None or ip.lower() == constants.VALUE_NONE:
4742
        nic_ip = None
4743
      elif ip.lower() == constants.VALUE_AUTO:
4744
        nic_ip = hostname1.ip
4745
      else:
4746
        if not utils.IsValidIP(ip):
4747
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
4748
                                     " like a valid IP" % ip)
4749
        nic_ip = ip
4750

    
4751
      # TODO: check the ip for uniqueness !!
4752
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
4753
        raise errors.OpPrereqError("Routed nic mode requires an ip address")
4754

    
4755
      # MAC address verification
4756
      mac = nic.get("mac", constants.VALUE_AUTO)
4757
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
4758
        if not utils.IsValidMac(mac.lower()):
4759
          raise errors.OpPrereqError("Invalid MAC address specified: %s" %
4760
                                     mac)
4761
      # bridge verification
4762
      bridge = nic.get("bridge", None)
4763
      link = nic.get("link", None)
4764
      if bridge and link:
4765
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
4766
                                   " at the same time")
4767
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
4768
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic")
4769
      elif bridge:
4770
        link = bridge
4771

    
4772
      nicparams = {}
4773
      if nic_mode_req:
4774
        nicparams[constants.NIC_MODE] = nic_mode_req
4775
      if link:
4776
        nicparams[constants.NIC_LINK] = link
4777

    
4778
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4779
                                      nicparams)
4780
      objects.NIC.CheckParameterSyntax(check_params)
4781
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
4782

    
4783
    # disk checks/pre-build
4784
    self.disks = []
4785
    for disk in self.op.disks:
4786
      mode = disk.get("mode", constants.DISK_RDWR)
4787
      if mode not in constants.DISK_ACCESS_SET:
4788
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
4789
                                   mode)
4790
      size = disk.get("size", None)
4791
      if size is None:
4792
        raise errors.OpPrereqError("Missing disk size")
4793
      try:
4794
        size = int(size)
4795
      except ValueError:
4796
        raise errors.OpPrereqError("Invalid disk size '%s'" % size)
4797
      self.disks.append({"size": size, "mode": mode})
4798

    
4799
    # used in CheckPrereq for ip ping check
4800
    self.check_ip = hostname1.ip
4801

    
4802
    # file storage checks
4803
    if (self.op.file_driver and
4804
        not self.op.file_driver in constants.FILE_DRIVER):
4805
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
4806
                                 self.op.file_driver)
4807

    
4808
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
4809
      raise errors.OpPrereqError("File storage directory path not absolute")
4810

    
4811
    ### Node/iallocator related checks
4812
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
4813
      raise errors.OpPrereqError("One and only one of iallocator and primary"
4814
                                 " node must be given")
4815

    
4816
    if self.op.iallocator:
4817
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4818
    else:
4819
      self.op.pnode = self._ExpandNode(self.op.pnode)
4820
      nodelist = [self.op.pnode]
4821
      if self.op.snode is not None:
4822
        self.op.snode = self._ExpandNode(self.op.snode)
4823
        nodelist.append(self.op.snode)
4824
      self.needed_locks[locking.LEVEL_NODE] = nodelist
4825

    
4826
    # in case of import lock the source node too
4827
    if self.op.mode == constants.INSTANCE_IMPORT:
4828
      src_node = getattr(self.op, "src_node", None)
4829
      src_path = getattr(self.op, "src_path", None)
4830

    
4831
      if src_path is None:
4832
        self.op.src_path = src_path = self.op.instance_name
4833

    
4834
      if src_node is None:
4835
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4836
        self.op.src_node = None
4837
        if os.path.isabs(src_path):
4838
          raise errors.OpPrereqError("Importing an instance from an absolute"
4839
                                     " path requires a source node option.")
4840
      else:
4841
        self.op.src_node = src_node = self._ExpandNode(src_node)
4842
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4843
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
4844
        if not os.path.isabs(src_path):
4845
          self.op.src_path = src_path = \
4846
            os.path.join(constants.EXPORT_DIR, src_path)
4847

    
4848
    else: # INSTANCE_CREATE
4849
      if getattr(self.op, "os_type", None) is None:
4850
        raise errors.OpPrereqError("No guest OS specified")
4851

    
4852
  def _RunAllocator(self):
4853
    """Run the allocator based on input opcode.
4854

4855
    """
4856
    nics = [n.ToDict() for n in self.nics]
4857
    ial = IAllocator(self.cfg, self.rpc,
4858
                     mode=constants.IALLOCATOR_MODE_ALLOC,
4859
                     name=self.op.instance_name,
4860
                     disk_template=self.op.disk_template,
4861
                     tags=[],
4862
                     os=self.op.os_type,
4863
                     vcpus=self.be_full[constants.BE_VCPUS],
4864
                     mem_size=self.be_full[constants.BE_MEMORY],
4865
                     disks=self.disks,
4866
                     nics=nics,
4867
                     hypervisor=self.op.hypervisor,
4868
                     )
4869

    
4870
    ial.Run(self.op.iallocator)
4871

    
4872
    if not ial.success:
4873
      raise errors.OpPrereqError("Can't compute nodes using"
4874
                                 " iallocator '%s': %s" % (self.op.iallocator,
4875
                                                           ial.info))
4876
    if len(ial.nodes) != ial.required_nodes:
4877
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
4878
                                 " of nodes (%s), required %s" %
4879
                                 (self.op.iallocator, len(ial.nodes),
4880
                                  ial.required_nodes))
4881
    self.op.pnode = ial.nodes[0]
4882
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
4883
                 self.op.instance_name, self.op.iallocator,
4884
                 ", ".join(ial.nodes))
4885
    if ial.required_nodes == 2:
4886
      self.op.snode = ial.nodes[1]
4887

    
4888
  def BuildHooksEnv(self):
4889
    """Build hooks env.
4890

4891
    This runs on master, primary and secondary nodes of the instance.
4892

4893
    """
4894
    env = {
4895
      "ADD_MODE": self.op.mode,
4896
      }
4897
    if self.op.mode == constants.INSTANCE_IMPORT:
4898
      env["SRC_NODE"] = self.op.src_node
4899
      env["SRC_PATH"] = self.op.src_path
4900
      env["SRC_IMAGES"] = self.src_images
4901

    
4902
    env.update(_BuildInstanceHookEnv(
4903
      name=self.op.instance_name,
4904
      primary_node=self.op.pnode,
4905
      secondary_nodes=self.secondaries,
4906
      status=self.op.start,
4907
      os_type=self.op.os_type,
4908
      memory=self.be_full[constants.BE_MEMORY],
4909
      vcpus=self.be_full[constants.BE_VCPUS],
4910
      nics=_NICListToTuple(self, self.nics),
4911
      disk_template=self.op.disk_template,
4912
      disks=[(d["size"], d["mode"]) for d in self.disks],
4913
      bep=self.be_full,
4914
      hvp=self.hv_full,
4915
      hypervisor_name=self.op.hypervisor,
4916
    ))
4917

    
4918
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
4919
          self.secondaries)
4920
    return env, nl, nl
4921

    
4922

    
4923
  def CheckPrereq(self):
4924
    """Check prerequisites.
4925

4926
    """
4927
    if (not self.cfg.GetVGName() and
4928
        self.op.disk_template not in constants.DTS_NOT_LVM):
4929
      raise errors.OpPrereqError("Cluster does not support lvm-based"
4930
                                 " instances")
4931

    
4932
    if self.op.mode == constants.INSTANCE_IMPORT:
4933
      src_node = self.op.src_node
4934
      src_path = self.op.src_path
4935

    
4936
      if src_node is None:
4937
        locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
4938
        exp_list = self.rpc.call_export_list(locked_nodes)
4939
        found = False
4940
        for node in exp_list:
4941
          if exp_list[node].fail_msg:
4942
            continue
4943
          if src_path in exp_list[node].payload:
4944
            found = True
4945
            self.op.src_node = src_node = node
4946
            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
4947
                                                       src_path)
4948
            break
4949
        if not found:
4950
          raise errors.OpPrereqError("No export found for relative path %s" %
4951
                                      src_path)
4952

    
4953
      _CheckNodeOnline(self, src_node)
4954
      result = self.rpc.call_export_info(src_node, src_path)
4955
      result.Raise("No export or invalid export found in dir %s" % src_path)
4956

    
4957
      export_info = objects.SerializableConfigParser.Loads(str(result.payload))
4958
      if not export_info.has_section(constants.INISECT_EXP):
4959
        raise errors.ProgrammerError("Corrupted export config")
4960

    
4961
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
4962
      if (int(ei_version) != constants.EXPORT_VERSION):
4963
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
4964
                                   (ei_version, constants.EXPORT_VERSION))
4965

    
4966
      # Check that the new instance doesn't have less disks than the export
4967
      instance_disks = len(self.disks)
4968
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
4969
      if instance_disks < export_disks:
4970
        raise errors.OpPrereqError("Not enough disks to import."
4971
                                   " (instance: %d, export: %d)" %
4972
                                   (instance_disks, export_disks))
4973

    
4974
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
4975
      disk_images = []
4976
      for idx in range(export_disks):
4977
        option = 'disk%d_dump' % idx
4978
        if export_info.has_option(constants.INISECT_INS, option):
4979
          # FIXME: are the old os-es, disk sizes, etc. useful?
4980
          export_name = export_info.get(constants.INISECT_INS, option)
4981
          image = os.path.join(src_path, export_name)
4982
          disk_images.append(image)
4983
        else:
4984
          disk_images.append(False)
4985

    
4986
      self.src_images = disk_images
4987

    
4988
      old_name = export_info.get(constants.INISECT_INS, 'name')
4989
      # FIXME: int() here could throw a ValueError on broken exports
4990
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
4991
      if self.op.instance_name == old_name:
4992
        for idx, nic in enumerate(self.nics):
4993
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
4994
            nic_mac_ini = 'nic%d_mac' % idx
4995
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
4996

    
4997
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
4998
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
4999
    if self.op.start and not self.op.ip_check:
5000
      raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
5001
                                 " adding an instance in start mode")
5002

    
5003
    if self.op.ip_check:
5004
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
5005
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5006
                                   (self.check_ip, self.op.instance_name))
5007

    
5008
    #### mac address generation
5009
    # By generating here the mac address both the allocator and the hooks get
5010
    # the real final mac address rather than the 'auto' or 'generate' value.
5011
    # There is a race condition between the generation and the instance object
5012
    # creation, which means that we know the mac is valid now, but we're not
5013
    # sure it will be when we actually add the instance. If things go bad
5014
    # adding the instance will abort because of a duplicate mac, and the
5015
    # creation job will fail.
5016
    for nic in self.nics:
5017
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5018
        nic.mac = self.cfg.GenerateMAC()
5019

    
5020
    #### allocator run
5021

    
5022
    if self.op.iallocator is not None:
5023
      self._RunAllocator()
5024

    
5025
    #### node related checks
5026

    
5027
    # check primary node
5028
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
5029
    assert self.pnode is not None, \
5030
      "Cannot retrieve locked node %s" % self.op.pnode
5031
    if pnode.offline:
5032
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
5033
                                 pnode.name)
5034
    if pnode.drained:
5035
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
5036
                                 pnode.name)
5037

    
5038
    self.secondaries = []
5039

    
5040
    # mirror node verification
5041
    if self.op.disk_template in constants.DTS_NET_MIRROR:
5042
      if self.op.snode is None:
5043
        raise errors.OpPrereqError("The networked disk templates need"
5044
                                   " a mirror node")
5045
      if self.op.snode == pnode.name:
5046
        raise errors.OpPrereqError("The secondary node cannot be"
5047
                                   " the primary node.")
5048
      _CheckNodeOnline(self, self.op.snode)
5049
      _CheckNodeNotDrained(self, self.op.snode)
5050
      self.secondaries.append(self.op.snode)
5051

    
5052
    nodenames = [pnode.name] + self.secondaries
5053

    
5054
    req_size = _ComputeDiskSize(self.op.disk_template,
5055
                                self.disks)
5056

    
5057
    # Check lv size requirements
5058
    if req_size is not None:
5059
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
5060
                                         self.op.hypervisor)
5061
      for node in nodenames:
5062
        info = nodeinfo[node]
5063
        info.Raise("Cannot get current information from node %s" % node)
5064
        info = info.payload
5065
        vg_free = info.get('vg_free', None)
5066
        if not isinstance(vg_free, int):
5067
          raise errors.OpPrereqError("Can't compute free disk space on"
5068
                                     " node %s" % node)
5069
        if req_size > vg_free:
5070
          raise errors.OpPrereqError("Not enough disk space on target node %s."
5071
                                     " %d MB available, %d MB required" %
5072
                                     (node, vg_free, req_size))
5073

    
5074
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
5075

    
5076
    # os verification
5077
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
5078
    result.Raise("OS '%s' not in supported os list for primary node %s" %
5079
                 (self.op.os_type, pnode.name), prereq=True)
5080

    
5081
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
5082

    
5083
    # memory check on primary node
5084
    if self.op.start:
5085
      _CheckNodeFreeMemory(self, self.pnode.name,
5086
                           "creating instance %s" % self.op.instance_name,
5087
                           self.be_full[constants.BE_MEMORY],
5088
                           self.op.hypervisor)
5089

    
5090
    self.dry_run_result = list(nodenames)
5091

    
5092
  def Exec(self, feedback_fn):
5093
    """Create and add the instance to the cluster.
5094

5095
    """
5096
    instance = self.op.instance_name
5097
    pnode_name = self.pnode.name
5098

    
5099
    ht_kind = self.op.hypervisor
5100
    if ht_kind in constants.HTS_REQ_PORT:
5101
      network_port = self.cfg.AllocatePort()
5102
    else:
5103
      network_port = None
5104

    
5105
    ##if self.op.vnc_bind_address is None:
5106
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
5107

    
5108
    # this is needed because os.path.join does not accept None arguments
5109
    if self.op.file_storage_dir is None:
5110
      string_file_storage_dir = ""
5111
    else:
5112
      string_file_storage_dir = self.op.file_storage_dir
5113

    
5114
    # build the full file storage dir path
5115
    file_storage_dir = os.path.normpath(os.path.join(
5116
                                        self.cfg.GetFileStorageDir(),
5117
                                        string_file_storage_dir, instance))
5118

    
5119

    
5120
    disks = _GenerateDiskTemplate(self,
5121
                                  self.op.disk_template,
5122
                                  instance, pnode_name,
5123
                                  self.secondaries,
5124
                                  self.disks,
5125
                                  file_storage_dir,
5126
                                  self.op.file_driver,
5127
                                  0)
5128

    
5129
    iobj = objects.Instance(name=instance, os=self.op.os_type,
5130
                            primary_node=pnode_name,
5131
                            nics=self.nics, disks=disks,
5132
                            disk_template=self.op.disk_template,
5133
                            admin_up=False,
5134
                            network_port=network_port,
5135
                            beparams=self.op.beparams,
5136
                            hvparams=self.op.hvparams,
5137
                            hypervisor=self.op.hypervisor,
5138
                            )
5139

    
5140
    feedback_fn("* creating instance disks...")
5141
    try:
5142
      _CreateDisks(self, iobj)
5143
    except errors.OpExecError:
5144
      self.LogWarning("Device creation failed, reverting...")
5145
      try:
5146
        _RemoveDisks(self, iobj)
5147
      finally:
5148
        self.cfg.ReleaseDRBDMinors(instance)
5149
        raise
5150

    
5151
    feedback_fn("adding instance %s to cluster config" % instance)
5152

    
5153
    self.cfg.AddInstance(iobj)
5154
    # Declare that we don't want to remove the instance lock anymore, as we've
5155
    # added the instance to the config
5156
    del self.remove_locks[locking.LEVEL_INSTANCE]
5157
    # Unlock all the nodes
5158
    if self.op.mode == constants.INSTANCE_IMPORT:
5159
      nodes_keep = [self.op.src_node]
5160
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
5161
                       if node != self.op.src_node]
5162
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
5163
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
5164
    else:
5165
      self.context.glm.release(locking.LEVEL_NODE)
5166
      del self.acquired_locks[locking.LEVEL_NODE]
5167

    
5168
    if self.op.wait_for_sync:
5169
      disk_abort = not _WaitForSync(self, iobj)
5170
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
5171
      # make sure the disks are not degraded (still sync-ing is ok)
5172
      time.sleep(15)
5173
      feedback_fn("* checking mirrors status")
5174
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
5175
    else:
5176
      disk_abort = False
5177

    
5178
    if disk_abort:
5179
      _RemoveDisks(self, iobj)
5180
      self.cfg.RemoveInstance(iobj.name)
5181
      # Make sure the instance lock gets removed
5182
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
5183
      raise errors.OpExecError("There are some degraded disks for"
5184
                               " this instance")
5185

    
5186
    feedback_fn("creating os for instance %s on node %s" %
5187
                (instance, pnode_name))
5188

    
5189
    if iobj.disk_template != constants.DT_DISKLESS:
5190
      if self.op.mode == constants.INSTANCE_CREATE:
5191
        feedback_fn("* running the instance OS create scripts...")
5192
        result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
5193
        result.Raise("Could not add os for instance %s"
5194
                     " on node %s" % (instance, pnode_name))
5195

    
5196
      elif self.op.mode == constants.INSTANCE_IMPORT:
5197
        feedback_fn("* running the instance OS import scripts...")
5198
        src_node = self.op.src_node
5199
        src_images = self.src_images
5200
        cluster_name = self.cfg.GetClusterName()
5201
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
5202
                                                         src_node, src_images,
5203
                                                         cluster_name)
5204
        msg = import_result.fail_msg
5205
        if msg:
5206
          self.LogWarning("Error while importing the disk images for instance"
5207
                          " %s on node %s: %s" % (instance, pnode_name, msg))
5208
      else:
5209
        # also checked in the prereq part
5210
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
5211
                                     % self.op.mode)
5212

    
5213
    if self.op.start:
5214
      iobj.admin_up = True
5215
      self.cfg.Update(iobj)
5216
      logging.info("Starting instance %s on node %s", instance, pnode_name)
5217
      feedback_fn("* starting instance...")
5218
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
5219
      result.Raise("Could not start instance")
5220

    
5221
    return list(iobj.all_nodes)
5222

    
5223

    
5224
class LUConnectConsole(NoHooksLU):
5225
  """Connect to an instance's console.
5226

5227
  This is somewhat special in that it returns the command line that
5228
  you need to run on the master node in order to connect to the
5229
  console.
5230

5231
  """
5232
  _OP_REQP = ["instance_name"]
5233
  REQ_BGL = False
5234

    
5235
  def ExpandNames(self):
5236
    self._ExpandAndLockInstance()
5237

    
5238
  def CheckPrereq(self):
5239
    """Check prerequisites.
5240

5241
    This checks that the instance is in the cluster.
5242

5243
    """
5244
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5245
    assert self.instance is not None, \
5246
      "Cannot retrieve locked instance %s" % self.op.instance_name
5247
    _CheckNodeOnline(self, self.instance.primary_node)
5248

    
5249
  def Exec(self, feedback_fn):
5250
    """Connect to the console of an instance
5251

5252
    """
5253
    instance = self.instance
5254
    node = instance.primary_node
5255

    
5256
    node_insts = self.rpc.call_instance_list([node],
5257
                                             [instance.hypervisor])[node]
5258
    node_insts.Raise("Can't get node information from %s" % node)
5259

    
5260
    if instance.name not in node_insts.payload:
5261
      raise errors.OpExecError("Instance %s is not running." % instance.name)
5262

    
5263
    logging.debug("Connecting to console of %s on %s", instance.name, node)
5264

    
5265
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
5266
    cluster = self.cfg.GetClusterInfo()
5267
    # beparams and hvparams are passed separately, to avoid editing the
5268
    # instance and then saving the defaults in the instance itself.
5269
    hvparams = cluster.FillHV(instance)
5270
    beparams = cluster.FillBE(instance)
5271
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
5272

    
5273
    # build ssh cmdline
5274
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
5275

    
5276

    
5277
class LUReplaceDisks(LogicalUnit):
5278
  """Replace the disks of an instance.
5279

5280
  """
5281
  HPATH = "mirrors-replace"
5282
  HTYPE = constants.HTYPE_INSTANCE
5283
  _OP_REQP = ["instance_name", "mode", "disks"]
5284
  REQ_BGL = False
5285

    
5286
  def CheckArguments(self):
5287
    if not hasattr(self.op, "remote_node"):
5288
      self.op.remote_node = None
5289
    if not hasattr(self.op, "iallocator"):
5290
      self.op.iallocator = None
5291

    
5292
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
5293
                                  self.op.iallocator)
5294

    
5295
  def ExpandNames(self):
5296
    self._ExpandAndLockInstance()
5297

    
5298
    if self.op.iallocator is not None:
5299
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5300

    
5301
    elif self.op.remote_node is not None:
5302
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
5303
      if remote_node is None:
5304
        raise errors.OpPrereqError("Node '%s' not known" %
5305
                                   self.op.remote_node)
5306

    
5307
      self.op.remote_node = remote_node
5308

    
5309
      # Warning: do not remove the locking of the new secondary here
5310
      # unless DRBD8.AddChildren is changed to work in parallel;
5311
      # currently it doesn't since parallel invocations of
5312
      # FindUnusedMinor will conflict
5313
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
5314
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5315

    
5316
    else:
5317
      self.needed_locks[locking.LEVEL_NODE] = []
5318
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5319

    
5320
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
5321
                                   self.op.iallocator, self.op.remote_node,
5322
                                   self.op.disks)
5323

    
5324
    self.tasklets = [self.replacer]
5325

    
5326
  def DeclareLocks(self, level):
5327
    # If we're not already locking all nodes in the set we have to declare the
5328
    # instance's primary/secondary nodes.
5329
    if (level == locking.LEVEL_NODE and
5330
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
5331
      self._LockInstancesNodes()
5332

    
5333
  def BuildHooksEnv(self):
5334
    """Build hooks env.
5335

5336
    This runs on the master, the primary and all the secondaries.
5337

5338
    """
5339
    instance = self.replacer.instance
5340
    env = {
5341
      "MODE": self.op.mode,
5342
      "NEW_SECONDARY": self.op.remote_node,
5343
      "OLD_SECONDARY": instance.secondary_nodes[0],
5344
      }
5345
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5346
    nl = [
5347
      self.cfg.GetMasterNode(),
5348
      instance.primary_node,
5349
      ]
5350
    if self.op.remote_node is not None:
5351
      nl.append(self.op.remote_node)
5352
    return env, nl, nl
5353

    
5354

    
5355
class LUEvacuateNode(LogicalUnit):
5356
  """Relocate the secondary instances from a node.
5357

5358
  """
5359
  HPATH = "node-evacuate"
5360
  HTYPE = constants.HTYPE_NODE
5361
  _OP_REQP = ["node_name"]
5362
  REQ_BGL = False
5363

    
5364
  def CheckArguments(self):
5365
    if not hasattr(self.op, "remote_node"):
5366
      self.op.remote_node = None
5367
    if not hasattr(self.op, "iallocator"):
5368
      self.op.iallocator = None
5369

    
5370
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
5371
                                  self.op.remote_node,
5372
                                  self.op.iallocator)
5373

    
5374
  def ExpandNames(self):
5375
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
5376
    if self.op.node_name is None:
5377
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
5378

    
5379
    self.needed_locks = {}
5380

    
5381
    # Declare node locks
5382
    if self.op.iallocator is not None:
5383
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5384

    
5385
    elif self.op.remote_node is not None:
5386
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
5387
      if remote_node is None:
5388
        raise errors.OpPrereqError("Node '%s' not known" %
5389
                                   self.op.remote_node)
5390

    
5391
      self.op.remote_node = remote_node
5392

    
5393
      # Warning: do not remove the locking of the new secondary here
5394
      # unless DRBD8.AddChildren is changed to work in parallel;
5395
      # currently it doesn't since parallel invocations of
5396
      # FindUnusedMinor will conflict
5397
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
5398
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5399

    
5400
    else:
5401
      raise errors.OpPrereqError("Invalid parameters")
5402

    
5403
    # Create tasklets for replacing disks for all secondary instances on this
5404
    # node
5405
    names = []
5406
    tasklets = []
5407

    
5408
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
5409
      logging.debug("Replacing disks for instance %s", inst.name)
5410
      names.append(inst.name)
5411

    
5412
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
5413
                                self.op.iallocator, self.op.remote_node, [])
5414
      tasklets.append(replacer)
5415

    
5416
    self.tasklets = tasklets
5417
    self.instance_names = names
5418

    
5419
    # Declare instance locks
5420
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
5421

    
5422
  def DeclareLocks(self, level):
5423
    # If we're not already locking all nodes in the set we have to declare the
5424
    # instance's primary/secondary nodes.
5425
    if (level == locking.LEVEL_NODE and
5426
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
5427
      self._LockInstancesNodes()
5428

    
5429
  def BuildHooksEnv(self):
5430
    """Build hooks env.
5431

5432
    This runs on the master, the primary and all the secondaries.
5433

5434
    """
5435
    env = {
5436
      "NODE_NAME": self.op.node_name,
5437
      }
5438

    
5439
    nl = [self.cfg.GetMasterNode()]
5440

    
5441
    if self.op.remote_node is not None:
5442
      env["NEW_SECONDARY"] = self.op.remote_node
5443
      nl.append(self.op.remote_node)
5444

    
5445
    return (env, nl, nl)
5446

    
5447

    
5448
class TLReplaceDisks(Tasklet):
5449
  """Replaces disks for an instance.
5450

5451
  Note: Locking is not within the scope of this class.
5452

5453
  """
5454
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
5455
               disks):
5456
    """Initializes this class.
5457

5458
    """
5459
    Tasklet.__init__(self, lu)
5460

    
5461
    # Parameters
5462
    self.instance_name = instance_name
5463
    self.mode = mode
5464
    self.iallocator_name = iallocator_name
5465
    self.remote_node = remote_node
5466
    self.disks = disks
5467

    
5468
    # Runtime data
5469
    self.instance = None
5470
    self.new_node = None
5471
    self.target_node = None
5472
    self.other_node = None
5473
    self.remote_node_info = None
5474
    self.node_secondary_ip = None
5475

    
5476
  @staticmethod
5477
  def CheckArguments(mode, remote_node, iallocator):
5478
    """Helper function for users of this class.
5479

5480
    """
5481
    # check for valid parameter combination
5482
    cnt = [remote_node, iallocator].count(None)
5483
    if mode == constants.REPLACE_DISK_CHG:
5484
      if cnt == 2:
5485
        raise errors.OpPrereqError("When changing the secondary either an"
5486
                                   " iallocator script must be used or the"
5487
                                   " new node given")
5488
      elif cnt == 0:
5489
        raise errors.OpPrereqError("Give either the iallocator or the new"
5490
                                   " secondary, not both")
5491
    else: # not replacing the secondary
5492
      if cnt != 2:
5493
        raise errors.OpPrereqError("The iallocator and new node options can"
5494
                                   " be used only when changing the"
5495
                                   " secondary node")
5496

    
5497
  @staticmethod
5498
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
5499
    """Compute a new secondary node using an IAllocator.
5500

5501
    """
5502
    ial = IAllocator(lu.cfg, lu.rpc,
5503
                     mode=constants.IALLOCATOR_MODE_RELOC,
5504
                     name=instance_name,
5505
                     relocate_from=relocate_from)
5506

    
5507
    ial.Run(iallocator_name)
5508

    
5509
    if not ial.success:
5510
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
5511
                                 " %s" % (iallocator_name, ial.info))
5512

    
5513
    if len(ial.nodes) != ial.required_nodes:
5514
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5515
                                 " of nodes (%s), required %s" %
5516
                                 (len(ial.nodes), ial.required_nodes))
5517

    
5518
    remote_node_name = ial.nodes[0]
5519

    
5520
    lu.LogInfo("Selected new secondary for instance '%s': %s",
5521
               instance_name, remote_node_name)
5522

    
5523
    return remote_node_name
5524

    
5525
  def CheckPrereq(self):
5526
    """Check prerequisites.
5527

5528
    This checks that the instance is in the cluster.
5529

5530
    """
5531
    self.instance = self.cfg.GetInstanceInfo(self.instance_name)
5532
    assert self.instance is not None, \
5533
      "Cannot retrieve locked instance %s" % self.instance_name
5534

    
5535
    if self.instance.disk_template != constants.DT_DRBD8:
5536
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
5537
                                 " instances")
5538

    
5539
    if len(self.instance.secondary_nodes) != 1:
5540
      raise errors.OpPrereqError("The instance has a strange layout,"
5541
                                 " expected one secondary but found %d" %
5542
                                 len(self.instance.secondary_nodes))
5543

    
5544
    secondary_node = self.instance.secondary_nodes[0]
5545

    
5546
    if self.iallocator_name is None:
5547
      remote_node = self.remote_node
5548
    else:
5549
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
5550
                                       self.instance.name, secondary_node)
5551

    
5552
    if remote_node is not None:
5553
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
5554
      assert self.remote_node_info is not None, \
5555
        "Cannot retrieve locked node %s" % remote_node
5556
    else:
5557
      self.remote_node_info = None
5558

    
5559
    if remote_node == self.instance.primary_node:
5560
      raise errors.OpPrereqError("The specified node is the primary node of"
5561
                                 " the instance.")
5562

    
5563
    if remote_node == secondary_node:
5564
      raise errors.OpPrereqError("The specified node is already the"
5565
                                 " secondary node of the instance.")
5566

    
5567
    if self.mode == constants.REPLACE_DISK_PRI:
5568
      self.target_node = self.instance.primary_node
5569
      self.other_node = secondary_node
5570
      check_nodes = [self.target_node, self.other_node]
5571

    
5572
    elif self.mode == constants.REPLACE_DISK_SEC:
5573
      self.target_node = secondary_node
5574
      self.other_node = self.instance.primary_node
5575
      check_nodes = [self.target_node, self.other_node]
5576

    
5577
    elif self.mode == constants.REPLACE_DISK_CHG:
5578
      self.new_node = remote_node
5579
      self.other_node = self.instance.primary_node
5580
      self.target_node = secondary_node
5581
      check_nodes = [self.new_node, self.other_node]
5582

    
5583
      _CheckNodeNotDrained(self.lu, remote_node)
5584

    
5585
    else:
5586
      raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
5587
                                   self.mode)
5588

    
5589
    for node in check_nodes:
5590
      _CheckNodeOnline(self.lu, node)
5591

    
5592
    # If not specified all disks should be replaced
5593
    if not self.disks:
5594
      self.disks = range(len(self.instance.disks))
5595

    
5596
    # Check whether disks are valid
5597
    for disk_idx in self.disks:
5598
      self.instance.FindDisk(disk_idx)
5599

    
5600
    # Get secondary node IP addresses
5601
    node_2nd_ip = {}
5602

    
5603
    for node_name in [self.target_node, self.other_node, self.new_node]:
5604
      if node_name is not None:
5605
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
5606

    
5607
    self.node_secondary_ip = node_2nd_ip
5608

    
5609
  def Exec(self, feedback_fn):
5610
    """Execute disk replacement.
5611

5612
    This dispatches the disk replacement to the appropriate handler.
5613

5614
    """
5615
    feedback_fn("Replacing disks for %s" % self.instance.name)
5616

    
5617
    activate_disks = (not self.instance.admin_up)
5618

    
5619
    # Activate the instance disks if we're replacing them on a down instance
5620
    if activate_disks:
5621
      _StartInstanceDisks(self.lu, self.instance, True)
5622

    
5623
    try:
5624
      if self.mode == constants.REPLACE_DISK_CHG:
5625
        return self._ExecDrbd8Secondary()
5626
      else:
5627
        return self._ExecDrbd8DiskOnly()
5628

    
5629
    finally:
5630
      # Deactivate the instance disks if we're replacing them on a down instance
5631
      if activate_disks:
5632
        _SafeShutdownInstanceDisks(self.lu, self.instance)
5633

    
5634
  def _CheckVolumeGroup(self, nodes):
5635
    self.lu.LogInfo("Checking volume groups")
5636

    
5637
    vgname = self.cfg.GetVGName()
5638

    
5639
    # Make sure volume group exists on all involved nodes
5640
    results = self.rpc.call_vg_list(nodes)
5641
    if not results:
5642
      raise errors.OpExecError("Can't list volume groups on the nodes")
5643

    
5644
    for node in nodes:
5645
      res = results[node]
5646
      res.Raise("Error checking node %s" % node)
5647
      if vgname not in res.payload:
5648
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
5649
                                 (vgname, node))
5650

    
5651
  def _CheckDisksExistence(self, nodes):
5652
    # Check disk existence
5653
    for idx, dev in enumerate(self.instance.disks):
5654
      if idx not in self.disks:
5655
        continue
5656

    
5657
      for node in nodes:
5658
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
5659
        self.cfg.SetDiskID(dev, node)
5660

    
5661
        result = self.rpc.call_blockdev_find(node, dev)
5662

    
5663
        msg = result.fail_msg
5664
        if msg or not result.payload:
5665
          if not msg:
5666
            msg = "disk not found"
5667
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
5668
                                   (idx, node, msg))
5669

    
5670
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
5671
    for idx, dev in enumerate(self.instance.disks):
5672
      if idx not in self.disks:
5673
        continue
5674

    
5675
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
5676
                      (idx, node_name))
5677

    
5678
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
5679
                                   ldisk=ldisk):
5680
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
5681
                                 " replace disks for instance %s" %
5682
                                 (node_name, self.instance.name))
5683

    
5684
  def _CreateNewStorage(self, node_name):
5685
    vgname = self.cfg.GetVGName()
5686
    iv_names = {}
5687

    
5688
    for idx, dev in enumerate(self.instance.disks):
5689
      if idx not in self.disks:
5690
        continue
5691

    
5692
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
5693

    
5694
      self.cfg.SetDiskID(dev, node_name)
5695

    
5696
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
5697
      names = _GenerateUniqueNames(self.lu, lv_names)
5698

    
5699
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
5700
                             logical_id=(vgname, names[0]))
5701
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5702
                             logical_id=(vgname, names[1]))
5703

    
5704
      new_lvs = [lv_data, lv_meta]
5705
      old_lvs = dev.children
5706
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
5707

    
5708
      # we pass force_create=True to force the LVM creation
5709
      for new_lv in new_lvs:
5710
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
5711
                        _GetInstanceInfoText(self.instance), False)
5712

    
5713
    return iv_names
5714

    
5715
  def _CheckDevices(self, node_name, iv_names):
5716
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
5717
      self.cfg.SetDiskID(dev, node_name)
5718

    
5719
      result = self.rpc.call_blockdev_find(node_name, dev)
5720

    
5721
      msg = result.fail_msg
5722
      if msg or not result.payload:
5723
        if not msg:
5724
          msg = "disk not found"
5725
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
5726
                                 (name, msg))
5727

    
5728
      if result.payload[5]:
5729
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
5730

    
5731
  def _RemoveOldStorage(self, node_name, iv_names):
5732
    for name, (dev, old_lvs, _) in iv_names.iteritems():
5733
      self.lu.LogInfo("Remove logical volumes for %s" % name)
5734

    
5735
      for lv in old_lvs:
5736
        self.cfg.SetDiskID(lv, node_name)
5737

    
5738
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
5739
        if msg:
5740
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
5741
                             hint="remove unused LVs manually")
5742

    
5743
  def _ExecDrbd8DiskOnly(self):
5744
    """Replace a disk on the primary or secondary for DRBD 8.
5745

5746
    The algorithm for replace is quite complicated:
5747

5748
      1. for each disk to be replaced:
5749

5750
        1. create new LVs on the target node with unique names
5751
        1. detach old LVs from the drbd device
5752
        1. rename old LVs to name_replaced.<time_t>
5753
        1. rename new LVs to old LVs
5754
        1. attach the new LVs (with the old names now) to the drbd device
5755

5756
      1. wait for sync across all devices
5757

5758
      1. for each modified disk:
5759

5760
        1. remove old LVs (which have the name name_replaces.<time_t>)
5761

5762
    Failures are not very well handled.
5763

5764
    """
5765
    steps_total = 6
5766

    
5767
    # Step: check device activation
5768
    self.lu.LogStep(1, steps_total, "Check device existence")
5769
    self._CheckDisksExistence([self.other_node, self.target_node])
5770
    self._CheckVolumeGroup([self.target_node, self.other_node])
5771

    
5772
    # Step: check other node consistency
5773
    self.lu.LogStep(2, steps_total, "Check peer consistency")
5774
    self._CheckDisksConsistency(self.other_node,
5775
                                self.other_node == self.instance.primary_node,
5776
                                False)
5777

    
5778
    # Step: create new storage
5779
    self.lu.LogStep(3, steps_total, "Allocate new storage")
5780
    iv_names = self._CreateNewStorage(self.target_node)
5781

    
5782
    # Step: for each lv, detach+rename*2+attach
5783
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
5784
    for dev, old_lvs, new_lvs in iv_names.itervalues():
5785
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
5786

    
5787
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev, old_lvs)
5788
      result.Raise("Can't detach drbd from local storage on node"
5789
                   " %s for device %s" % (self.target_node, dev.iv_name))
5790
      #dev.children = []
5791
      #cfg.Update(instance)
5792

    
5793
      # ok, we created the new LVs, so now we know we have the needed
5794
      # storage; as such, we proceed on the target node to rename
5795
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
5796
      # using the assumption that logical_id == physical_id (which in
5797
      # turn is the unique_id on that node)
5798

    
5799
      # FIXME(iustin): use a better name for the replaced LVs
5800
      temp_suffix = int(time.time())
5801
      ren_fn = lambda d, suff: (d.physical_id[0],
5802
                                d.physical_id[1] + "_replaced-%s" % suff)
5803

    
5804
      # Build the rename list based on what LVs exist on the node
5805
      rename_old_to_new = []
5806
      for to_ren in old_lvs:
5807
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
5808
        if not result.fail_msg and result.payload:
5809
          # device exists
5810
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
5811

    
5812
      self.lu.LogInfo("Renaming the old LVs on the target node")
5813
      result = self.rpc.call_blockdev_rename(self.target_node, rename_old_to_new)
5814
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
5815

    
5816
      # Now we rename the new LVs to the old LVs
5817
      self.lu.LogInfo("Renaming the new LVs on the target node")
5818
      rename_new_to_old = [(new, old.physical_id)
5819
                           for old, new in zip(old_lvs, new_lvs)]
5820
      result = self.rpc.call_blockdev_rename(self.target_node, rename_new_to_old)
5821
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
5822

    
5823
      for old, new in zip(old_lvs, new_lvs):
5824
        new.logical_id = old.logical_id
5825
        self.cfg.SetDiskID(new, self.target_node)
5826

    
5827
      for disk in old_lvs:
5828
        disk.logical_id = ren_fn(disk, temp_suffix)
5829
        self.cfg.SetDiskID(disk, self.target_node)
5830

    
5831
      # Now that the new lvs have the old name, we can add them to the device
5832
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
5833
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev, new_lvs)
5834
      msg = result.fail_msg
5835
      if msg:
5836
        for new_lv in new_lvs:
5837
          msg2 = self.rpc.call_blockdev_remove(self.target_node, new_lv).fail_msg
5838
          if msg2:
5839
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
5840
                               hint=("cleanup manually the unused logical"
5841
                                     "volumes"))
5842
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
5843

    
5844
      dev.children = new_lvs
5845

    
5846
      self.cfg.Update(self.instance)
5847

    
5848
    # Wait for sync
5849
    # This can fail as the old devices are degraded and _WaitForSync
5850
    # does a combined result over all disks, so we don't check its return value
5851
    self.lu.LogStep(5, steps_total, "Sync devices")
5852
    _WaitForSync(self.lu, self.instance, unlock=True)
5853

    
5854
    # Check all devices manually
5855
    self._CheckDevices(self.instance.primary_node, iv_names)
5856

    
5857
    # Step: remove old storage
5858
    self.lu.LogStep(6, steps_total, "Removing old storage")
5859
    self._RemoveOldStorage(self.target_node, iv_names)
5860

    
5861
  def _ExecDrbd8Secondary(self):
5862
    """Replace the secondary node for DRBD 8.
5863

5864
    The algorithm for replace is quite complicated:
5865
      - for all disks of the instance:
5866
        - create new LVs on the new node with same names
5867
        - shutdown the drbd device on the old secondary
5868
        - disconnect the drbd network on the primary
5869
        - create the drbd device on the new secondary
5870
        - network attach the drbd on the primary, using an artifice:
5871
          the drbd code for Attach() will connect to the network if it
5872
          finds a device which is connected to the good local disks but
5873
          not network enabled
5874
      - wait for sync across all devices
5875
      - remove all disks from the old secondary
5876

5877
    Failures are not very well handled.
5878

5879
    """
5880
    steps_total = 6
5881

    
5882
    # Step: check device activation
5883
    self.lu.LogStep(1, steps_total, "Check device existence")
5884
    self._CheckDisksExistence([self.instance.primary_node])
5885
    self._CheckVolumeGroup([self.instance.primary_node])
5886

    
5887
    # Step: check other node consistency
5888
    self.lu.LogStep(2, steps_total, "Check peer consistency")
5889
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
5890

    
5891
    # Step: create new storage
5892
    self.lu.LogStep(3, steps_total, "Allocate new storage")
5893
    for idx, dev in enumerate(self.instance.disks):
5894
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
5895
                      (self.new_node, idx))
5896
      # we pass force_create=True to force LVM creation
5897
      for new_lv in dev.children:
5898
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
5899
                        _GetInstanceInfoText(self.instance), False)
5900

    
5901
    # Step 4: dbrd minors and drbd setups changes
5902
    # after this, we must manually remove the drbd minors on both the
5903
    # error and the success paths
5904
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
5905
    minors = self.cfg.AllocateDRBDMinor([self.new_node for dev in self.instance.disks],
5906
                                        self.instance.name)
5907
    logging.debug("Allocated minors %r" % (minors,))
5908

    
5909
    iv_names = {}
5910
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
5911
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" % (self.new_node, idx))
5912
      # create new devices on new_node; note that we create two IDs:
5913
      # one without port, so the drbd will be activated without
5914
      # networking information on the new node at this stage, and one
5915
      # with network, for the latter activation in step 4
5916
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
5917
      if self.instance.primary_node == o_node1:
5918
        p_minor = o_minor1
5919
      else:
5920
        p_minor = o_minor2
5921

    
5922
      new_alone_id = (self.instance.primary_node, self.new_node, None, p_minor, new_minor, o_secret)
5923
      new_net_id = (self.instance.primary_node, self.new_node, o_port, p_minor, new_minor, o_secret)
5924

    
5925
      iv_names[idx] = (dev, dev.children, new_net_id)
5926
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
5927
                    new_net_id)
5928
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
5929
                              logical_id=new_alone_id,
5930
                              children=dev.children,
5931
                              size=dev.size)
5932
      try:
5933
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
5934
                              _GetInstanceInfoText(self.instance), False)
5935
      except errors.GenericError:
5936
        self.cfg.ReleaseDRBDMinors(self.instance.name)
5937
        raise
5938

    
5939
    # We have new devices, shutdown the drbd on the old secondary
5940
    for idx, dev in enumerate(self.instance.disks):
5941
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
5942
      self.cfg.SetDiskID(dev, self.target_node)
5943
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
5944
      if msg:
5945
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
5946
                           "node: %s" % (idx, msg),
5947
                           hint=("Please cleanup this device manually as"
5948
                                 " soon as possible"))
5949

    
5950
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
5951
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node], self.node_secondary_ip,
5952
                                               self.instance.disks)[self.instance.primary_node]
5953

    
5954
    msg = result.fail_msg
5955
    if msg:
5956
      # detaches didn't succeed (unlikely)
5957
      self.cfg.ReleaseDRBDMinors(self.instance.name)
5958
      raise errors.OpExecError("Can't detach the disks from the network on"
5959
                               " old node: %s" % (msg,))
5960

    
5961
    # if we managed to detach at least one, we update all the disks of
5962
    # the instance to point to the new secondary
5963
    self.lu.LogInfo("Updating instance configuration")
5964
    for dev, _, new_logical_id in iv_names.itervalues():
5965
      dev.logical_id = new_logical_id
5966
      self.cfg.SetDiskID(dev, self.instance.primary_node)
5967

    
5968
    self.cfg.Update(self.instance)
5969

    
5970
    # and now perform the drbd attach
5971
    self.lu.LogInfo("Attaching primary drbds to new secondary"
5972
                    " (standalone => connected)")
5973
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node, self.new_node], self.node_secondary_ip,
5974
                                           self.instance.disks, self.instance.name,
5975
                                           False)
5976
    for to_node, to_result in result.items():
5977
      msg = to_result.fail_msg
5978
      if msg:
5979
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s", to_node, msg,
5980
                           hint=("please do a gnt-instance info to see the"
5981
                                 " status of disks"))
5982

    
5983
    # Wait for sync
5984
    # This can fail as the old devices are degraded and _WaitForSync
5985
    # does a combined result over all disks, so we don't check its return value
5986
    self.lu.LogStep(5, steps_total, "Sync devices")
5987
    _WaitForSync(self.lu, self.instance, unlock=True)
5988

    
5989
    # Check all devices manually
5990
    self._CheckDevices(self.instance.primary_node, iv_names)
5991

    
5992
    # Step: remove old storage
5993
    self.lu.LogStep(6, steps_total, "Removing old storage")
5994
    self._RemoveOldStorage(self.target_node, iv_names)
5995

    
5996

    
5997
class LUGrowDisk(LogicalUnit):
5998
  """Grow a disk of an instance.
5999

6000
  """
6001
  HPATH = "disk-grow"
6002
  HTYPE = constants.HTYPE_INSTANCE
6003
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
6004
  REQ_BGL = False
6005

    
6006
  def ExpandNames(self):
6007
    self._ExpandAndLockInstance()
6008
    self.needed_locks[locking.LEVEL_NODE] = []
6009
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6010

    
6011
  def DeclareLocks(self, level):
6012
    if level == locking.LEVEL_NODE:
6013
      self._LockInstancesNodes()
6014

    
6015
  def BuildHooksEnv(self):
6016
    """Build hooks env.
6017

6018
    This runs on the master, the primary and all the secondaries.
6019

6020
    """
6021
    env = {
6022
      "DISK": self.op.disk,
6023
      "AMOUNT": self.op.amount,
6024
      }
6025
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6026
    nl = [
6027
      self.cfg.GetMasterNode(),
6028
      self.instance.primary_node,
6029
      ]
6030
    return env, nl, nl
6031

    
6032
  def CheckPrereq(self):
6033
    """Check prerequisites.
6034

6035
    This checks that the instance is in the cluster.
6036

6037
    """
6038
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6039
    assert instance is not None, \
6040
      "Cannot retrieve locked instance %s" % self.op.instance_name
6041
    nodenames = list(instance.all_nodes)
6042
    for node in nodenames:
6043
      _CheckNodeOnline(self, node)
6044

    
6045

    
6046
    self.instance = instance
6047

    
6048
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
6049
      raise errors.OpPrereqError("Instance's disk layout does not support"
6050
                                 " growing.")
6051

    
6052
    self.disk = instance.FindDisk(self.op.disk)
6053

    
6054
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
6055
                                       instance.hypervisor)
6056
    for node in nodenames:
6057
      info = nodeinfo[node]
6058
      info.Raise("Cannot get current information from node %s" % node)
6059
      vg_free = info.payload.get('vg_free', None)
6060
      if not isinstance(vg_free, int):
6061
        raise errors.OpPrereqError("Can't compute free disk space on"
6062
                                   " node %s" % node)
6063
      if self.op.amount > vg_free:
6064
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
6065
                                   " %d MiB available, %d MiB required" %
6066
                                   (node, vg_free, self.op.amount))
6067

    
6068
  def Exec(self, feedback_fn):
6069
    """Execute disk grow.
6070

6071
    """
6072
    instance = self.instance
6073
    disk = self.disk
6074
    for node in instance.all_nodes:
6075
      self.cfg.SetDiskID(disk, node)
6076
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
6077
      result.Raise("Grow request failed to node %s" % node)
6078
    disk.RecordGrow(self.op.amount)
6079
    self.cfg.Update(instance)
6080
    if self.op.wait_for_sync:
6081
      disk_abort = not _WaitForSync(self, instance)
6082
      if disk_abort:
6083
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
6084
                             " status.\nPlease check the instance.")
6085

    
6086

    
6087
class LUQueryInstanceData(NoHooksLU):
6088
  """Query runtime instance data.
6089

6090
  """
6091
  _OP_REQP = ["instances", "static"]
6092
  REQ_BGL = False
6093

    
6094
  def ExpandNames(self):
6095
    self.needed_locks = {}
6096
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
6097

    
6098
    if not isinstance(self.op.instances, list):
6099
      raise errors.OpPrereqError("Invalid argument type 'instances'")
6100

    
6101
    if self.op.instances:
6102
      self.wanted_names = []
6103
      for name in self.op.instances:
6104
        full_name = self.cfg.ExpandInstanceName(name)
6105
        if full_name is None:
6106
          raise errors.OpPrereqError("Instance '%s' not known" % name)
6107
        self.wanted_names.append(full_name)
6108
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
6109
    else:
6110
      self.wanted_names = None
6111
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
6112

    
6113
    self.needed_locks[locking.LEVEL_NODE] = []
6114
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6115

    
6116
  def DeclareLocks(self, level):
6117
    if level == locking.LEVEL_NODE:
6118
      self._LockInstancesNodes()
6119

    
6120
  def CheckPrereq(self):
6121
    """Check prerequisites.
6122

6123
    This only checks the optional instance list against the existing names.
6124

6125
    """
6126
    if self.wanted_names is None:
6127
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
6128

    
6129
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
6130
                             in self.wanted_names]
6131
    return
6132

    
6133
  def _ComputeDiskStatus(self, instance, snode, dev):
6134
    """Compute block device status.
6135

6136
    """
6137
    static = self.op.static
6138
    if not static:
6139
      self.cfg.SetDiskID(dev, instance.primary_node)
6140
      dev_pstatus = self.rpc.call_blockdev_find(instance.primary_node, dev)
6141
      if dev_pstatus.offline:
6142
        dev_pstatus = None
6143
      else:
6144
        dev_pstatus.Raise("Can't compute disk status for %s" % instance.name)
6145
        dev_pstatus = dev_pstatus.payload
6146
    else:
6147
      dev_pstatus = None
6148

    
6149
    if dev.dev_type in constants.LDS_DRBD:
6150
      # we change the snode then (otherwise we use the one passed in)
6151
      if dev.logical_id[0] == instance.primary_node:
6152
        snode = dev.logical_id[1]
6153
      else:
6154
        snode = dev.logical_id[0]
6155

    
6156
    if snode and not static:
6157
      self.cfg.SetDiskID(dev, snode)
6158
      dev_sstatus = self.rpc.call_blockdev_find(snode, dev)
6159
      if dev_sstatus.offline:
6160
        dev_sstatus = None
6161
      else:
6162
        dev_sstatus.Raise("Can't compute disk status for %s" % instance.name)
6163
        dev_sstatus = dev_sstatus.payload
6164
    else:
6165
      dev_sstatus = None
6166

    
6167
    if dev.children:
6168
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
6169
                      for child in dev.children]
6170
    else:
6171
      dev_children = []
6172

    
6173
    data = {
6174
      "iv_name": dev.iv_name,
6175
      "dev_type": dev.dev_type,
6176
      "logical_id": dev.logical_id,
6177
      "physical_id": dev.physical_id,
6178
      "pstatus": dev_pstatus,
6179
      "sstatus": dev_sstatus,
6180
      "children": dev_children,
6181
      "mode": dev.mode,
6182
      "size": dev.size,
6183
      }
6184

    
6185
    return data
6186

    
6187
  def Exec(self, feedback_fn):
6188
    """Gather and return data"""
6189
    result = {}
6190

    
6191
    cluster = self.cfg.GetClusterInfo()
6192

    
6193
    for instance in self.wanted_instances:
6194
      if not self.op.static:
6195
        remote_info = self.rpc.call_instance_info(instance.primary_node,
6196
                                                  instance.name,
6197
                                                  instance.hypervisor)
6198
        remote_info.Raise("Error checking node %s" % instance.primary_node)
6199
        remote_info = remote_info.payload
6200
        if remote_info and "state" in remote_info:
6201
          remote_state = "up"
6202
        else:
6203
          remote_state = "down"
6204
      else:
6205
        remote_state = None
6206
      if instance.admin_up:
6207
        config_state = "up"
6208
      else:
6209
        config_state = "down"
6210

    
6211
      disks = [self._ComputeDiskStatus(instance, None, device)
6212
               for device in instance.disks]
6213

    
6214
      idict = {
6215
        "name": instance.name,
6216
        "config_state": config_state,
6217
        "run_state": remote_state,
6218
        "pnode": instance.primary_node,
6219
        "snodes": instance.secondary_nodes,
6220
        "os": instance.os,
6221
        # this happens to be the same format used for hooks
6222
        "nics": _NICListToTuple(self, instance.nics),
6223
        "disks": disks,
6224
        "hypervisor": instance.hypervisor,
6225
        "network_port": instance.network_port,
6226
        "hv_instance": instance.hvparams,
6227
        "hv_actual": cluster.FillHV(instance),
6228
        "be_instance": instance.beparams,
6229
        "be_actual": cluster.FillBE(instance),
6230
        }
6231

    
6232
      result[instance.name] = idict
6233

    
6234
    return result
6235

    
6236

    
6237
class LUSetInstanceParams(LogicalUnit):
6238
  """Modifies an instances's parameters.
6239

6240
  """
6241
  HPATH = "instance-modify"
6242
  HTYPE = constants.HTYPE_INSTANCE
6243
  _OP_REQP = ["instance_name"]
6244
  REQ_BGL = False
6245

    
6246
  def CheckArguments(self):
6247
    if not hasattr(self.op, 'nics'):
6248
      self.op.nics = []
6249
    if not hasattr(self.op, 'disks'):
6250
      self.op.disks = []
6251
    if not hasattr(self.op, 'beparams'):
6252
      self.op.beparams = {}
6253
    if not hasattr(self.op, 'hvparams'):
6254
      self.op.hvparams = {}
6255
    self.op.force = getattr(self.op, "force", False)
6256
    if not (self.op.nics or self.op.disks or
6257
            self.op.hvparams or self.op.beparams):
6258
      raise errors.OpPrereqError("No changes submitted")
6259

    
6260
    # Disk validation
6261
    disk_addremove = 0
6262
    for disk_op, disk_dict in self.op.disks:
6263
      if disk_op == constants.DDM_REMOVE:
6264
        disk_addremove += 1
6265
        continue
6266
      elif disk_op == constants.DDM_ADD:
6267
        disk_addremove += 1
6268
      else:
6269
        if not isinstance(disk_op, int):
6270
          raise errors.OpPrereqError("Invalid disk index")
6271
        if not isinstance(disk_dict, dict):
6272
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
6273
          raise errors.OpPrereqError(msg)
6274

    
6275
      if disk_op == constants.DDM_ADD:
6276
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
6277
        if mode not in constants.DISK_ACCESS_SET:
6278
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
6279
        size = disk_dict.get('size', None)
6280
        if size is None:
6281
          raise errors.OpPrereqError("Required disk parameter size missing")
6282
        try:
6283
          size = int(size)
6284
        except ValueError, err:
6285
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
6286
                                     str(err))
6287
        disk_dict['size'] = size
6288
      else:
6289
        # modification of disk
6290
        if 'size' in disk_dict:
6291
          raise errors.OpPrereqError("Disk size change not possible, use"
6292
                                     " grow-disk")
6293

    
6294
    if disk_addremove > 1:
6295
      raise errors.OpPrereqError("Only one disk add or remove operation"
6296
                                 " supported at a time")
6297

    
6298
    # NIC validation
6299
    nic_addremove = 0
6300
    for nic_op, nic_dict in self.op.nics:
6301
      if nic_op == constants.DDM_REMOVE:
6302
        nic_addremove += 1
6303
        continue
6304
      elif nic_op == constants.DDM_ADD:
6305
        nic_addremove += 1
6306
      else:
6307
        if not isinstance(nic_op, int):
6308
          raise errors.OpPrereqError("Invalid nic index")
6309
        if not isinstance(nic_dict, dict):
6310
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
6311
          raise errors.OpPrereqError(msg)
6312

    
6313
      # nic_dict should be a dict
6314
      nic_ip = nic_dict.get('ip', None)
6315
      if nic_ip is not None:
6316
        if nic_ip.lower() == constants.VALUE_NONE:
6317
          nic_dict['ip'] = None
6318
        else:
6319
          if not utils.IsValidIP(nic_ip):
6320
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
6321

    
6322
      nic_bridge = nic_dict.get('bridge', None)
6323
      nic_link = nic_dict.get('link', None)
6324
      if nic_bridge and nic_link:
6325
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6326
                                   " at the same time")
6327
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
6328
        nic_dict['bridge'] = None
6329
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
6330
        nic_dict['link'] = None
6331

    
6332
      if nic_op == constants.DDM_ADD:
6333
        nic_mac = nic_dict.get('mac', None)
6334
        if nic_mac is None:
6335
          nic_dict['mac'] = constants.VALUE_AUTO
6336

    
6337
      if 'mac' in nic_dict:
6338
        nic_mac = nic_dict['mac']
6339
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6340
          if not utils.IsValidMac(nic_mac):
6341
            raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
6342
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
6343
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
6344
                                     " modifying an existing nic")
6345

    
6346
    if nic_addremove > 1:
6347
      raise errors.OpPrereqError("Only one NIC add or remove operation"
6348
                                 " supported at a time")
6349

    
6350
  def ExpandNames(self):
6351
    self._ExpandAndLockInstance()
6352
    self.needed_locks[locking.LEVEL_NODE] = []
6353
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6354

    
6355
  def DeclareLocks(self, level):
6356
    if level == locking.LEVEL_NODE:
6357
      self._LockInstancesNodes()
6358

    
6359
  def BuildHooksEnv(self):
6360
    """Build hooks env.
6361

6362
    This runs on the master, primary and secondaries.
6363

6364
    """
6365
    args = dict()
6366
    if constants.BE_MEMORY in self.be_new:
6367
      args['memory'] = self.be_new[constants.BE_MEMORY]
6368
    if constants.BE_VCPUS in self.be_new:
6369
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
6370
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
6371
    # information at all.
6372
    if self.op.nics:
6373
      args['nics'] = []
6374
      nic_override = dict(self.op.nics)
6375
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
6376
      for idx, nic in enumerate(self.instance.nics):
6377
        if idx in nic_override:
6378
          this_nic_override = nic_override[idx]
6379
        else:
6380
          this_nic_override = {}
6381
        if 'ip' in this_nic_override:
6382
          ip = this_nic_override['ip']
6383
        else:
6384
          ip = nic.ip
6385
        if 'mac' in this_nic_override:
6386
          mac = this_nic_override['mac']
6387
        else:
6388
          mac = nic.mac
6389
        if idx in self.nic_pnew:
6390
          nicparams = self.nic_pnew[idx]
6391
        else:
6392
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
6393
        mode = nicparams[constants.NIC_MODE]
6394
        link = nicparams[constants.NIC_LINK]
6395
        args['nics'].append((ip, mac, mode, link))
6396
      if constants.DDM_ADD in nic_override:
6397
        ip = nic_override[constants.DDM_ADD].get('ip', None)
6398
        mac = nic_override[constants.DDM_ADD]['mac']
6399
        nicparams = self.nic_pnew[constants.DDM_ADD]
6400
        mode = nicparams[constants.NIC_MODE]
6401
        link = nicparams[constants.NIC_LINK]
6402
        args['nics'].append((ip, mac, mode, link))
6403
      elif constants.DDM_REMOVE in nic_override:
6404
        del args['nics'][-1]
6405

    
6406
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
6407
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6408
    return env, nl, nl
6409

    
6410
  def _GetUpdatedParams(self, old_params, update_dict,
6411
                        default_values, parameter_types):
6412
    """Return the new params dict for the given params.
6413

6414
    @type old_params: dict
6415
    @param old_params: old parameters
6416
    @type update_dict: dict
6417
    @param update_dict: dict containing new parameter values,
6418
                        or constants.VALUE_DEFAULT to reset the
6419
                        parameter to its default value
6420
    @type default_values: dict
6421
    @param default_values: default values for the filled parameters
6422
    @type parameter_types: dict
6423
    @param parameter_types: dict mapping target dict keys to types
6424
                            in constants.ENFORCEABLE_TYPES
6425
    @rtype: (dict, dict)
6426
    @return: (new_parameters, filled_parameters)
6427

6428
    """
6429
    params_copy = copy.deepcopy(old_params)
6430
    for key, val in update_dict.iteritems():
6431
      if val == constants.VALUE_DEFAULT:
6432
        try:
6433
          del params_copy[key]
6434
        except KeyError:
6435
          pass
6436
      else:
6437
        params_copy[key] = val
6438
    utils.ForceDictType(params_copy, parameter_types)
6439
    params_filled = objects.FillDict(default_values, params_copy)
6440
    return (params_copy, params_filled)
6441

    
6442
  def CheckPrereq(self):
6443
    """Check prerequisites.
6444

6445
    This only checks the instance list against the existing names.
6446

6447
    """
6448
    self.force = self.op.force
6449

    
6450
    # checking the new params on the primary/secondary nodes
6451

    
6452
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6453
    cluster = self.cluster = self.cfg.GetClusterInfo()
6454
    assert self.instance is not None, \
6455
      "Cannot retrieve locked instance %s" % self.op.instance_name
6456
    pnode = instance.primary_node
6457
    nodelist = list(instance.all_nodes)
6458

    
6459
    # hvparams processing
6460
    if self.op.hvparams:
6461
      i_hvdict, hv_new = self._GetUpdatedParams(
6462
                             instance.hvparams, self.op.hvparams,
6463
                             cluster.hvparams[instance.hypervisor],
6464
                             constants.HVS_PARAMETER_TYPES)
6465
      # local check
6466
      hypervisor.GetHypervisor(
6467
        instance.hypervisor).CheckParameterSyntax(hv_new)
6468
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
6469
      self.hv_new = hv_new # the new actual values
6470
      self.hv_inst = i_hvdict # the new dict (without defaults)
6471
    else:
6472
      self.hv_new = self.hv_inst = {}
6473

    
6474
    # beparams processing
6475
    if self.op.beparams:
6476
      i_bedict, be_new = self._GetUpdatedParams(
6477
                             instance.beparams, self.op.beparams,
6478
                             cluster.beparams[constants.PP_DEFAULT],
6479
                             constants.BES_PARAMETER_TYPES)
6480
      self.be_new = be_new # the new actual values
6481
      self.be_inst = i_bedict # the new dict (without defaults)
6482
    else:
6483
      self.be_new = self.be_inst = {}
6484

    
6485
    self.warn = []
6486

    
6487
    if constants.BE_MEMORY in self.op.beparams and not self.force:
6488
      mem_check_list = [pnode]
6489
      if be_new[constants.BE_AUTO_BALANCE]:
6490
        # either we changed auto_balance to yes or it was from before
6491
        mem_check_list.extend(instance.secondary_nodes)
6492
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
6493
                                                  instance.hypervisor)
6494
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
6495
                                         instance.hypervisor)
6496
      pninfo = nodeinfo[pnode]
6497
      msg = pninfo.fail_msg
6498
      if msg:
6499
        # Assume the primary node is unreachable and go ahead
6500
        self.warn.append("Can't get info from primary node %s: %s" %
6501
                         (pnode,  msg))
6502
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
6503
        self.warn.append("Node data from primary node %s doesn't contain"
6504
                         " free memory information" % pnode)
6505
      elif instance_info.fail_msg:
6506
        self.warn.append("Can't get instance runtime information: %s" %
6507
                        instance_info.fail_msg)
6508
      else:
6509
        if instance_info.payload:
6510
          current_mem = int(instance_info.payload['memory'])
6511
        else:
6512
          # Assume instance not running
6513
          # (there is a slight race condition here, but it's not very probable,
6514
          # and we have no other way to check)
6515
          current_mem = 0
6516
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
6517
                    pninfo.payload['memory_free'])
6518
        if miss_mem > 0:
6519
          raise errors.OpPrereqError("This change will prevent the instance"
6520
                                     " from starting, due to %d MB of memory"
6521
                                     " missing on its primary node" % miss_mem)
6522

    
6523
      if be_new[constants.BE_AUTO_BALANCE]:
6524
        for node, nres in nodeinfo.items():
6525
          if node not in instance.secondary_nodes:
6526
            continue
6527
          msg = nres.fail_msg
6528
          if msg:
6529
            self.warn.append("Can't get info from secondary node %s: %s" %
6530
                             (node, msg))
6531
          elif not isinstance(nres.payload.get('memory_free', None), int):
6532
            self.warn.append("Secondary node %s didn't return free"
6533
                             " memory information" % node)
6534
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
6535
            self.warn.append("Not enough memory to failover instance to"
6536
                             " secondary node %s" % node)
6537

    
6538
    # NIC processing
6539
    self.nic_pnew = {}
6540
    self.nic_pinst = {}
6541
    for nic_op, nic_dict in self.op.nics:
6542
      if nic_op == constants.DDM_REMOVE:
6543
        if not instance.nics:
6544
          raise errors.OpPrereqError("Instance has no NICs, cannot remove")
6545
        continue
6546
      if nic_op != constants.DDM_ADD:
6547
        # an existing nic
6548
        if nic_op < 0 or nic_op >= len(instance.nics):
6549
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
6550
                                     " are 0 to %d" %
6551
                                     (nic_op, len(instance.nics)))
6552
        old_nic_params = instance.nics[nic_op].nicparams
6553
        old_nic_ip = instance.nics[nic_op].ip
6554
      else:
6555
        old_nic_params = {}
6556
        old_nic_ip = None
6557

    
6558
      update_params_dict = dict([(key, nic_dict[key])
6559
                                 for key in constants.NICS_PARAMETERS
6560
                                 if key in nic_dict])
6561

    
6562
      if 'bridge' in nic_dict:
6563
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
6564

    
6565
      new_nic_params, new_filled_nic_params = \
6566
          self._GetUpdatedParams(old_nic_params, update_params_dict,
6567
                                 cluster.nicparams[constants.PP_DEFAULT],
6568
                                 constants.NICS_PARAMETER_TYPES)
6569
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
6570
      self.nic_pinst[nic_op] = new_nic_params
6571
      self.nic_pnew[nic_op] = new_filled_nic_params
6572
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
6573

    
6574
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
6575
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
6576
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
6577
        if msg:
6578
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
6579
          if self.force:
6580
            self.warn.append(msg)
6581
          else:
6582
            raise errors.OpPrereqError(msg)
6583
      if new_nic_mode == constants.NIC_MODE_ROUTED:
6584
        if 'ip' in nic_dict:
6585
          nic_ip = nic_dict['ip']
6586
        else:
6587
          nic_ip = old_nic_ip
6588
        if nic_ip is None:
6589
          raise errors.OpPrereqError('Cannot set the nic ip to None'
6590
                                     ' on a routed nic')
6591
      if 'mac' in nic_dict:
6592
        nic_mac = nic_dict['mac']
6593
        if nic_mac is None:
6594
          raise errors.OpPrereqError('Cannot set the nic mac to None')
6595
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6596
          # otherwise generate the mac
6597
          nic_dict['mac'] = self.cfg.GenerateMAC()
6598
        else:
6599
          # or validate/reserve the current one
6600
          if self.cfg.IsMacInUse(nic_mac):
6601
            raise errors.OpPrereqError("MAC address %s already in use"
6602
                                       " in cluster" % nic_mac)
6603

    
6604
    # DISK processing
6605
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
6606
      raise errors.OpPrereqError("Disk operations not supported for"
6607
                                 " diskless instances")
6608
    for disk_op, disk_dict in self.op.disks:
6609
      if disk_op == constants.DDM_REMOVE:
6610
        if len(instance.disks) == 1:
6611
          raise errors.OpPrereqError("Cannot remove the last disk of"
6612
                                     " an instance")
6613
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
6614
        ins_l = ins_l[pnode]
6615
        msg = ins_l.fail_msg
6616
        if msg:
6617
          raise errors.OpPrereqError("Can't contact node %s: %s" %
6618
                                     (pnode, msg))
6619
        if instance.name in ins_l.payload:
6620
          raise errors.OpPrereqError("Instance is running, can't remove"
6621
                                     " disks.")
6622

    
6623
      if (disk_op == constants.DDM_ADD and
6624
          len(instance.nics) >= constants.MAX_DISKS):
6625
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
6626
                                   " add more" % constants.MAX_DISKS)
6627
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
6628
        # an existing disk
6629
        if disk_op < 0 or disk_op >= len(instance.disks):
6630
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
6631
                                     " are 0 to %d" %
6632
                                     (disk_op, len(instance.disks)))
6633

    
6634
    return
6635

    
6636
  def Exec(self, feedback_fn):
6637
    """Modifies an instance.
6638

6639
    All parameters take effect only at the next restart of the instance.
6640

6641
    """
6642
    # Process here the warnings from CheckPrereq, as we don't have a
6643
    # feedback_fn there.
6644
    for warn in self.warn:
6645
      feedback_fn("WARNING: %s" % warn)
6646

    
6647
    result = []
6648
    instance = self.instance
6649
    cluster = self.cluster
6650
    # disk changes
6651
    for disk_op, disk_dict in self.op.disks:
6652
      if disk_op == constants.DDM_REMOVE:
6653
        # remove the last disk
6654
        device = instance.disks.pop()
6655
        device_idx = len(instance.disks)
6656
        for node, disk in device.ComputeNodeTree(instance.primary_node):
6657
          self.cfg.SetDiskID(disk, node)
6658
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
6659
          if msg:
6660
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
6661
                            " continuing anyway", device_idx, node, msg)
6662
        result.append(("disk/%d" % device_idx, "remove"))
6663
      elif disk_op == constants.DDM_ADD:
6664
        # add a new disk
6665
        if instance.disk_template == constants.DT_FILE:
6666
          file_driver, file_path = instance.disks[0].logical_id
6667
          file_path = os.path.dirname(file_path)
6668
        else:
6669
          file_driver = file_path = None
6670
        disk_idx_base = len(instance.disks)
6671
        new_disk = _GenerateDiskTemplate(self,
6672
                                         instance.disk_template,
6673
                                         instance.name, instance.primary_node,
6674
                                         instance.secondary_nodes,
6675
                                         [disk_dict],
6676
                                         file_path,
6677
                                         file_driver,
6678
                                         disk_idx_base)[0]
6679
        instance.disks.append(new_disk)
6680
        info = _GetInstanceInfoText(instance)
6681

    
6682
        logging.info("Creating volume %s for instance %s",
6683
                     new_disk.iv_name, instance.name)
6684
        # Note: this needs to be kept in sync with _CreateDisks
6685
        #HARDCODE
6686
        for node in instance.all_nodes:
6687
          f_create = node == instance.primary_node
6688
          try:
6689
            _CreateBlockDev(self, node, instance, new_disk,
6690
                            f_create, info, f_create)
6691
          except errors.OpExecError, err:
6692
            self.LogWarning("Failed to create volume %s (%s) on"
6693
                            " node %s: %s",
6694
                            new_disk.iv_name, new_disk, node, err)
6695
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
6696
                       (new_disk.size, new_disk.mode)))
6697
      else:
6698
        # change a given disk
6699
        instance.disks[disk_op].mode = disk_dict['mode']
6700
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
6701
    # NIC changes
6702
    for nic_op, nic_dict in self.op.nics:
6703
      if nic_op == constants.DDM_REMOVE:
6704
        # remove the last nic
6705
        del instance.nics[-1]
6706
        result.append(("nic.%d" % len(instance.nics), "remove"))
6707
      elif nic_op == constants.DDM_ADD:
6708
        # mac and bridge should be set, by now
6709
        mac = nic_dict['mac']
6710
        ip = nic_dict.get('ip', None)
6711
        nicparams = self.nic_pinst[constants.DDM_ADD]
6712
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
6713
        instance.nics.append(new_nic)
6714
        result.append(("nic.%d" % (len(instance.nics) - 1),
6715
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
6716
                       (new_nic.mac, new_nic.ip,
6717
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
6718
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
6719
                       )))
6720
      else:
6721
        for key in 'mac', 'ip':
6722
          if key in nic_dict:
6723
            setattr(instance.nics[nic_op], key, nic_dict[key])
6724
        if nic_op in self.nic_pnew:
6725
          instance.nics[nic_op].nicparams = self.nic_pnew[nic_op]
6726
        for key, val in nic_dict.iteritems():
6727
          result.append(("nic.%s/%d" % (key, nic_op), val))
6728

    
6729
    # hvparams changes
6730
    if self.op.hvparams:
6731
      instance.hvparams = self.hv_inst
6732
      for key, val in self.op.hvparams.iteritems():
6733
        result.append(("hv/%s" % key, val))
6734

    
6735
    # beparams changes
6736
    if self.op.beparams:
6737
      instance.beparams = self.be_inst
6738
      for key, val in self.op.beparams.iteritems():
6739
        result.append(("be/%s" % key, val))
6740

    
6741
    self.cfg.Update(instance)
6742

    
6743
    return result
6744

    
6745

    
6746
class LUQueryExports(NoHooksLU):
6747
  """Query the exports list
6748

6749
  """
6750
  _OP_REQP = ['nodes']
6751
  REQ_BGL = False
6752

    
6753
  def ExpandNames(self):
6754
    self.needed_locks = {}
6755
    self.share_locks[locking.LEVEL_NODE] = 1
6756
    if not self.op.nodes:
6757
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6758
    else:
6759
      self.needed_locks[locking.LEVEL_NODE] = \
6760
        _GetWantedNodes(self, self.op.nodes)
6761

    
6762
  def CheckPrereq(self):
6763
    """Check prerequisites.
6764

6765
    """
6766
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
6767

    
6768
  def Exec(self, feedback_fn):
6769
    """Compute the list of all the exported system images.
6770

6771
    @rtype: dict
6772
    @return: a dictionary with the structure node->(export-list)
6773
        where export-list is a list of the instances exported on
6774
        that node.
6775

6776
    """
6777
    rpcresult = self.rpc.call_export_list(self.nodes)
6778
    result = {}
6779
    for node in rpcresult:
6780
      if rpcresult[node].fail_msg:
6781
        result[node] = False
6782
      else:
6783
        result[node] = rpcresult[node].payload
6784

    
6785
    return result
6786

    
6787

    
6788
class LUExportInstance(LogicalUnit):
6789
  """Export an instance to an image in the cluster.
6790

6791
  """
6792
  HPATH = "instance-export"
6793
  HTYPE = constants.HTYPE_INSTANCE
6794
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
6795
  REQ_BGL = False
6796

    
6797
  def ExpandNames(self):
6798
    self._ExpandAndLockInstance()
6799
    # FIXME: lock only instance primary and destination node
6800
    #
6801
    # Sad but true, for now we have do lock all nodes, as we don't know where
6802
    # the previous export might be, and and in this LU we search for it and
6803
    # remove it from its current node. In the future we could fix this by:
6804
    #  - making a tasklet to search (share-lock all), then create the new one,
6805
    #    then one to remove, after
6806
    #  - removing the removal operation altogether
6807
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6808

    
6809
  def DeclareLocks(self, level):
6810
    """Last minute lock declaration."""
6811
    # All nodes are locked anyway, so nothing to do here.
6812

    
6813
  def BuildHooksEnv(self):
6814
    """Build hooks env.
6815

6816
    This will run on the master, primary node and target node.
6817

6818
    """
6819
    env = {
6820
      "EXPORT_NODE": self.op.target_node,
6821
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
6822
      }
6823
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6824
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
6825
          self.op.target_node]
6826
    return env, nl, nl
6827

    
6828
  def CheckPrereq(self):
6829
    """Check prerequisites.
6830

6831
    This checks that the instance and node names are valid.
6832

6833
    """
6834
    instance_name = self.op.instance_name
6835
    self.instance = self.cfg.GetInstanceInfo(instance_name)
6836
    assert self.instance is not None, \
6837
          "Cannot retrieve locked instance %s" % self.op.instance_name
6838
    _CheckNodeOnline(self, self.instance.primary_node)
6839

    
6840
    self.dst_node = self.cfg.GetNodeInfo(
6841
      self.cfg.ExpandNodeName(self.op.target_node))
6842

    
6843
    if self.dst_node is None:
6844
      # This is wrong node name, not a non-locked node
6845
      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
6846
    _CheckNodeOnline(self, self.dst_node.name)
6847
    _CheckNodeNotDrained(self, self.dst_node.name)
6848

    
6849
    # instance disk type verification
6850
    for disk in self.instance.disks:
6851
      if disk.dev_type == constants.LD_FILE:
6852
        raise errors.OpPrereqError("Export not supported for instances with"
6853
                                   " file-based disks")
6854

    
6855
  def Exec(self, feedback_fn):
6856
    """Export an instance to an image in the cluster.
6857

6858
    """
6859
    instance = self.instance
6860
    dst_node = self.dst_node
6861
    src_node = instance.primary_node
6862
    if self.op.shutdown:
6863
      # shutdown the instance, but not the disks
6864
      result = self.rpc.call_instance_shutdown(src_node, instance)
6865
      result.Raise("Could not shutdown instance %s on"
6866
                   " node %s" % (instance.name, src_node))
6867

    
6868
    vgname = self.cfg.GetVGName()
6869

    
6870
    snap_disks = []
6871

    
6872
    # set the disks ID correctly since call_instance_start needs the
6873
    # correct drbd minor to create the symlinks
6874
    for disk in instance.disks:
6875
      self.cfg.SetDiskID(disk, src_node)
6876

    
6877
    try:
6878
      for idx, disk in enumerate(instance.disks):
6879
        # result.payload will be a snapshot of an lvm leaf of the one we passed
6880
        result = self.rpc.call_blockdev_snapshot(src_node, disk)
6881
        msg = result.fail_msg
6882
        if msg:
6883
          self.LogWarning("Could not snapshot disk/%s on node %s: %s",
6884
                          idx, src_node, msg)
6885
          snap_disks.append(False)
6886
        else:
6887
          disk_id = (vgname, result.payload)
6888
          new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
6889
                                 logical_id=disk_id, physical_id=disk_id,
6890
                                 iv_name=disk.iv_name)
6891
          snap_disks.append(new_dev)
6892

    
6893
    finally:
6894
      if self.op.shutdown and instance.admin_up:
6895
        result = self.rpc.call_instance_start(src_node, instance, None, None)
6896
        msg = result.fail_msg
6897
        if msg:
6898
          _ShutdownInstanceDisks(self, instance)
6899
          raise errors.OpExecError("Could not start instance: %s" % msg)
6900

    
6901
    # TODO: check for size
6902

    
6903
    cluster_name = self.cfg.GetClusterName()
6904
    for idx, dev in enumerate(snap_disks):
6905
      if dev:
6906
        result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
6907
                                               instance, cluster_name, idx)
6908
        msg = result.fail_msg
6909
        if msg:
6910
          self.LogWarning("Could not export disk/%s from node %s to"
6911
                          " node %s: %s", idx, src_node, dst_node.name, msg)
6912
        msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
6913
        if msg:
6914
          self.LogWarning("Could not remove snapshot for disk/%d from node"
6915
                          " %s: %s", idx, src_node, msg)
6916

    
6917
    result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
6918
    msg = result.fail_msg
6919
    if msg:
6920
      self.LogWarning("Could not finalize export for instance %s"
6921
                      " on node %s: %s", instance.name, dst_node.name, msg)
6922

    
6923
    nodelist = self.cfg.GetNodeList()
6924
    nodelist.remove(dst_node.name)
6925

    
6926
    # on one-node clusters nodelist will be empty after the removal
6927
    # if we proceed the backup would be removed because OpQueryExports
6928
    # substitutes an empty list with the full cluster node list.
6929
    iname = instance.name
6930
    if nodelist:
6931
      exportlist = self.rpc.call_export_list(nodelist)
6932
      for node in exportlist:
6933
        if exportlist[node].fail_msg:
6934
          continue
6935
        if iname in exportlist[node].payload:
6936
          msg = self.rpc.call_export_remove(node, iname).fail_msg
6937
          if msg:
6938
            self.LogWarning("Could not remove older export for instance %s"
6939
                            " on node %s: %s", iname, node, msg)
6940

    
6941

    
6942
class LURemoveExport(NoHooksLU):
6943
  """Remove exports related to the named instance.
6944

6945
  """
6946
  _OP_REQP = ["instance_name"]
6947
  REQ_BGL = False
6948

    
6949
  def ExpandNames(self):
6950
    self.needed_locks = {}
6951
    # We need all nodes to be locked in order for RemoveExport to work, but we
6952
    # don't need to lock the instance itself, as nothing will happen to it (and
6953
    # we can remove exports also for a removed instance)
6954
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6955

    
6956
  def CheckPrereq(self):
6957
    """Check prerequisites.
6958
    """
6959
    pass
6960

    
6961
  def Exec(self, feedback_fn):
6962
    """Remove any export.
6963

6964
    """
6965
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
6966
    # If the instance was not found we'll try with the name that was passed in.
6967
    # This will only work if it was an FQDN, though.
6968
    fqdn_warn = False
6969
    if not instance_name:
6970
      fqdn_warn = True
6971
      instance_name = self.op.instance_name
6972

    
6973
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6974
    exportlist = self.rpc.call_export_list(locked_nodes)
6975
    found = False
6976
    for node in exportlist:
6977
      msg = exportlist[node].fail_msg
6978
      if msg:
6979
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
6980
        continue
6981
      if instance_name in exportlist[node].payload:
6982
        found = True
6983
        result = self.rpc.call_export_remove(node, instance_name)
6984
        msg = result.fail_msg
6985
        if msg:
6986
          logging.error("Could not remove export for instance %s"
6987
                        " on node %s: %s", instance_name, node, msg)
6988

    
6989
    if fqdn_warn and not found:
6990
      feedback_fn("Export not found. If trying to remove an export belonging"
6991
                  " to a deleted instance please use its Fully Qualified"
6992
                  " Domain Name.")
6993

    
6994

    
6995
class TagsLU(NoHooksLU):
6996
  """Generic tags LU.
6997

6998
  This is an abstract class which is the parent of all the other tags LUs.
6999

7000
  """
7001

    
7002
  def ExpandNames(self):
7003
    self.needed_locks = {}
7004
    if self.op.kind == constants.TAG_NODE:
7005
      name = self.cfg.ExpandNodeName(self.op.name)
7006
      if name is None:
7007
        raise errors.OpPrereqError("Invalid node name (%s)" %
7008
                                   (self.op.name,))
7009
      self.op.name = name
7010
      self.needed_locks[locking.LEVEL_NODE] = name
7011
    elif self.op.kind == constants.TAG_INSTANCE:
7012
      name = self.cfg.ExpandInstanceName(self.op.name)
7013
      if name is None:
7014
        raise errors.OpPrereqError("Invalid instance name (%s)" %
7015
                                   (self.op.name,))
7016
      self.op.name = name
7017
      self.needed_locks[locking.LEVEL_INSTANCE] = name
7018

    
7019
  def CheckPrereq(self):
7020
    """Check prerequisites.
7021

7022
    """
7023
    if self.op.kind == constants.TAG_CLUSTER:
7024
      self.target = self.cfg.GetClusterInfo()
7025
    elif self.op.kind == constants.TAG_NODE:
7026
      self.target = self.cfg.GetNodeInfo(self.op.name)
7027
    elif self.op.kind == constants.TAG_INSTANCE:
7028
      self.target = self.cfg.GetInstanceInfo(self.op.name)
7029
    else:
7030
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
7031
                                 str(self.op.kind))
7032

    
7033

    
7034
class LUGetTags(TagsLU):
7035
  """Returns the tags of a given object.
7036

7037
  """
7038
  _OP_REQP = ["kind", "name"]
7039
  REQ_BGL = False
7040

    
7041
  def Exec(self, feedback_fn):
7042
    """Returns the tag list.
7043

7044
    """
7045
    return list(self.target.GetTags())
7046

    
7047

    
7048
class LUSearchTags(NoHooksLU):
7049
  """Searches the tags for a given pattern.
7050

7051
  """
7052
  _OP_REQP = ["pattern"]
7053
  REQ_BGL = False
7054

    
7055
  def ExpandNames(self):
7056
    self.needed_locks = {}
7057

    
7058
  def CheckPrereq(self):
7059
    """Check prerequisites.
7060

7061
    This checks the pattern passed for validity by compiling it.
7062

7063
    """
7064
    try:
7065
      self.re = re.compile(self.op.pattern)
7066
    except re.error, err:
7067
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
7068
                                 (self.op.pattern, err))
7069

    
7070
  def Exec(self, feedback_fn):
7071
    """Returns the tag list.
7072

7073
    """
7074
    cfg = self.cfg
7075
    tgts = [("/cluster", cfg.GetClusterInfo())]
7076
    ilist = cfg.GetAllInstancesInfo().values()
7077
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
7078
    nlist = cfg.GetAllNodesInfo().values()
7079
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
7080
    results = []
7081
    for path, target in tgts:
7082
      for tag in target.GetTags():
7083
        if self.re.search(tag):
7084
          results.append((path, tag))
7085
    return results
7086

    
7087

    
7088
class LUAddTags(TagsLU):
7089
  """Sets a tag on a given object.
7090

7091
  """
7092
  _OP_REQP = ["kind", "name", "tags"]
7093
  REQ_BGL = False
7094

    
7095
  def CheckPrereq(self):
7096
    """Check prerequisites.
7097

7098
    This checks the type and length of the tag name and value.
7099

7100
    """
7101
    TagsLU.CheckPrereq(self)
7102
    for tag in self.op.tags:
7103
      objects.TaggableObject.ValidateTag(tag)
7104

    
7105
  def Exec(self, feedback_fn):
7106
    """Sets the tag.
7107

7108
    """
7109
    try:
7110
      for tag in self.op.tags:
7111
        self.target.AddTag(tag)
7112
    except errors.TagError, err:
7113
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
7114
    try:
7115
      self.cfg.Update(self.target)
7116
    except errors.ConfigurationError:
7117
      raise errors.OpRetryError("There has been a modification to the"
7118
                                " config file and the operation has been"
7119
                                " aborted. Please retry.")
7120

    
7121

    
7122
class LUDelTags(TagsLU):
7123
  """Delete a list of tags from a given object.
7124

7125
  """
7126
  _OP_REQP = ["kind", "name", "tags"]
7127
  REQ_BGL = False
7128

    
7129
  def CheckPrereq(self):
7130
    """Check prerequisites.
7131

7132
    This checks that we have the given tag.
7133

7134
    """
7135
    TagsLU.CheckPrereq(self)
7136
    for tag in self.op.tags:
7137
      objects.TaggableObject.ValidateTag(tag)
7138
    del_tags = frozenset(self.op.tags)
7139
    cur_tags = self.target.GetTags()
7140
    if not del_tags <= cur_tags:
7141
      diff_tags = del_tags - cur_tags
7142
      diff_names = ["'%s'" % tag for tag in diff_tags]
7143
      diff_names.sort()
7144
      raise errors.OpPrereqError("Tag(s) %s not found" %
7145
                                 (",".join(diff_names)))
7146

    
7147
  def Exec(self, feedback_fn):
7148
    """Remove the tag from the object.
7149

7150
    """
7151
    for tag in self.op.tags:
7152
      self.target.RemoveTag(tag)
7153
    try:
7154
      self.cfg.Update(self.target)
7155
    except errors.ConfigurationError:
7156
      raise errors.OpRetryError("There has been a modification to the"
7157
                                " config file and the operation has been"
7158
                                " aborted. Please retry.")
7159

    
7160

    
7161
class LUTestDelay(NoHooksLU):
7162
  """Sleep for a specified amount of time.
7163

7164
  This LU sleeps on the master and/or nodes for a specified amount of
7165
  time.
7166

7167
  """
7168
  _OP_REQP = ["duration", "on_master", "on_nodes"]
7169
  REQ_BGL = False
7170

    
7171
  def ExpandNames(self):
7172
    """Expand names and set required locks.
7173

7174
    This expands the node list, if any.
7175

7176
    """
7177
    self.needed_locks = {}
7178
    if self.op.on_nodes:
7179
      # _GetWantedNodes can be used here, but is not always appropriate to use
7180
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
7181
      # more information.
7182
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
7183
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
7184

    
7185
  def CheckPrereq(self):
7186
    """Check prerequisites.
7187

7188
    """
7189

    
7190
  def Exec(self, feedback_fn):
7191
    """Do the actual sleep.
7192

7193
    """
7194
    if self.op.on_master:
7195
      if not utils.TestDelay(self.op.duration):
7196
        raise errors.OpExecError("Error during master delay test")
7197
    if self.op.on_nodes:
7198
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
7199
      for node, node_result in result.items():
7200
        node_result.Raise("Failure during rpc call to node %s" % node)
7201

    
7202

    
7203
class IAllocator(object):
7204
  """IAllocator framework.
7205

7206
  An IAllocator instance has three sets of attributes:
7207
    - cfg that is needed to query the cluster
7208
    - input data (all members of the _KEYS class attribute are required)
7209
    - four buffer attributes (in|out_data|text), that represent the
7210
      input (to the external script) in text and data structure format,
7211
      and the output from it, again in two formats
7212
    - the result variables from the script (success, info, nodes) for
7213
      easy usage
7214

7215
  """
7216
  _ALLO_KEYS = [
7217
    "mem_size", "disks", "disk_template",
7218
    "os", "tags", "nics", "vcpus", "hypervisor",
7219
    ]
7220
  _RELO_KEYS = [
7221
    "relocate_from",
7222
    ]
7223

    
7224
  def __init__(self, cfg, rpc, mode, name, **kwargs):
7225
    self.cfg = cfg
7226
    self.rpc = rpc
7227
    # init buffer variables
7228
    self.in_text = self.out_text = self.in_data = self.out_data = None
7229
    # init all input fields so that pylint is happy
7230
    self.mode = mode
7231
    self.name = name
7232
    self.mem_size = self.disks = self.disk_template = None
7233
    self.os = self.tags = self.nics = self.vcpus = None
7234
    self.hypervisor = None
7235
    self.relocate_from = None
7236
    # computed fields
7237
    self.required_nodes = None
7238
    # init result fields
7239
    self.success = self.info = self.nodes = None
7240
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7241
      keyset = self._ALLO_KEYS
7242
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
7243
      keyset = self._RELO_KEYS
7244
    else:
7245
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
7246
                                   " IAllocator" % self.mode)
7247
    for key in kwargs:
7248
      if key not in keyset:
7249
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
7250
                                     " IAllocator" % key)
7251
      setattr(self, key, kwargs[key])
7252
    for key in keyset:
7253
      if key not in kwargs:
7254
        raise errors.ProgrammerError("Missing input parameter '%s' to"
7255
                                     " IAllocator" % key)
7256
    self._BuildInputData()
7257

    
7258
  def _ComputeClusterData(self):
7259
    """Compute the generic allocator input data.
7260

7261
    This is the data that is independent of the actual operation.
7262

7263
    """
7264
    cfg = self.cfg
7265
    cluster_info = cfg.GetClusterInfo()
7266
    # cluster data
7267
    data = {
7268
      "version": constants.IALLOCATOR_VERSION,
7269
      "cluster_name": cfg.GetClusterName(),
7270
      "cluster_tags": list(cluster_info.GetTags()),
7271
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
7272
      # we don't have job IDs
7273
      }
7274
    iinfo = cfg.GetAllInstancesInfo().values()
7275
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
7276

    
7277
    # node data
7278
    node_results = {}
7279
    node_list = cfg.GetNodeList()
7280

    
7281
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7282
      hypervisor_name = self.hypervisor
7283
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
7284
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
7285

    
7286
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
7287
                                        hypervisor_name)
7288
    node_iinfo = \
7289
      self.rpc.call_all_instances_info(node_list,
7290
                                       cluster_info.enabled_hypervisors)
7291
    for nname, nresult in node_data.items():
7292
      # first fill in static (config-based) values
7293
      ninfo = cfg.GetNodeInfo(nname)
7294
      pnr = {
7295
        "tags": list(ninfo.GetTags()),
7296
        "primary_ip": ninfo.primary_ip,
7297
        "secondary_ip": ninfo.secondary_ip,
7298
        "offline": ninfo.offline,
7299
        "drained": ninfo.drained,
7300
        "master_candidate": ninfo.master_candidate,
7301
        }
7302

    
7303
      if not ninfo.offline:
7304
        nresult.Raise("Can't get data for node %s" % nname)
7305
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
7306
                                nname)
7307
        remote_info = nresult.payload
7308
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
7309
                     'vg_size', 'vg_free', 'cpu_total']:
7310
          if attr not in remote_info:
7311
            raise errors.OpExecError("Node '%s' didn't return attribute"
7312
                                     " '%s'" % (nname, attr))
7313
          if not isinstance(remote_info[attr], int):
7314
            raise errors.OpExecError("Node '%s' returned invalid value"
7315
                                     " for '%s': %s" %
7316
                                     (nname, attr, remote_info[attr]))
7317
        # compute memory used by primary instances
7318
        i_p_mem = i_p_up_mem = 0
7319
        for iinfo, beinfo in i_list:
7320
          if iinfo.primary_node == nname:
7321
            i_p_mem += beinfo[constants.BE_MEMORY]
7322
            if iinfo.name not in node_iinfo[nname].payload:
7323
              i_used_mem = 0
7324
            else:
7325
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
7326
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
7327
            remote_info['memory_free'] -= max(0, i_mem_diff)
7328

    
7329
            if iinfo.admin_up:
7330
              i_p_up_mem += beinfo[constants.BE_MEMORY]
7331

    
7332
        # compute memory used by instances
7333
        pnr_dyn = {
7334
          "total_memory": remote_info['memory_total'],
7335
          "reserved_memory": remote_info['memory_dom0'],
7336
          "free_memory": remote_info['memory_free'],
7337
          "total_disk": remote_info['vg_size'],
7338
          "free_disk": remote_info['vg_free'],
7339
          "total_cpus": remote_info['cpu_total'],
7340
          "i_pri_memory": i_p_mem,
7341
          "i_pri_up_memory": i_p_up_mem,
7342
          }
7343
        pnr.update(pnr_dyn)
7344

    
7345
      node_results[nname] = pnr
7346
    data["nodes"] = node_results
7347

    
7348
    # instance data
7349
    instance_data = {}
7350
    for iinfo, beinfo in i_list:
7351
      nic_data = []
7352
      for nic in iinfo.nics:
7353
        filled_params = objects.FillDict(
7354
            cluster_info.nicparams[constants.PP_DEFAULT],
7355
            nic.nicparams)
7356
        nic_dict = {"mac": nic.mac,
7357
                    "ip": nic.ip,
7358
                    "mode": filled_params[constants.NIC_MODE],
7359
                    "link": filled_params[constants.NIC_LINK],
7360
                   }
7361
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
7362
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
7363
        nic_data.append(nic_dict)
7364
      pir = {
7365
        "tags": list(iinfo.GetTags()),
7366
        "admin_up": iinfo.admin_up,
7367
        "vcpus": beinfo[constants.BE_VCPUS],
7368
        "memory": beinfo[constants.BE_MEMORY],
7369
        "os": iinfo.os,
7370
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
7371
        "nics": nic_data,
7372
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
7373
        "disk_template": iinfo.disk_template,
7374
        "hypervisor": iinfo.hypervisor,
7375
        }
7376
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
7377
                                                 pir["disks"])
7378
      instance_data[iinfo.name] = pir
7379

    
7380
    data["instances"] = instance_data
7381

    
7382
    self.in_data = data
7383

    
7384
  def _AddNewInstance(self):
7385
    """Add new instance data to allocator structure.
7386

7387
    This in combination with _AllocatorGetClusterData will create the
7388
    correct structure needed as input for the allocator.
7389

7390
    The checks for the completeness of the opcode must have already been
7391
    done.
7392

7393
    """
7394
    data = self.in_data
7395

    
7396
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
7397

    
7398
    if self.disk_template in constants.DTS_NET_MIRROR:
7399
      self.required_nodes = 2
7400
    else:
7401
      self.required_nodes = 1
7402
    request = {
7403
      "type": "allocate",
7404
      "name": self.name,
7405
      "disk_template": self.disk_template,
7406
      "tags": self.tags,
7407
      "os": self.os,
7408
      "vcpus": self.vcpus,
7409
      "memory": self.mem_size,
7410
      "disks": self.disks,
7411
      "disk_space_total": disk_space,
7412
      "nics": self.nics,
7413
      "required_nodes": self.required_nodes,
7414
      }
7415
    data["request"] = request
7416

    
7417
  def _AddRelocateInstance(self):
7418
    """Add relocate instance data to allocator structure.
7419

7420
    This in combination with _IAllocatorGetClusterData will create the
7421
    correct structure needed as input for the allocator.
7422

7423
    The checks for the completeness of the opcode must have already been
7424
    done.
7425

7426
    """
7427
    instance = self.cfg.GetInstanceInfo(self.name)
7428
    if instance is None:
7429
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
7430
                                   " IAllocator" % self.name)
7431

    
7432
    if instance.disk_template not in constants.DTS_NET_MIRROR:
7433
      raise errors.OpPrereqError("Can't relocate non-mirrored instances")
7434

    
7435
    if len(instance.secondary_nodes) != 1:
7436
      raise errors.OpPrereqError("Instance has not exactly one secondary node")
7437

    
7438
    self.required_nodes = 1
7439
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
7440
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
7441

    
7442
    request = {
7443
      "type": "relocate",
7444
      "name": self.name,
7445
      "disk_space_total": disk_space,
7446
      "required_nodes": self.required_nodes,
7447
      "relocate_from": self.relocate_from,
7448
      }
7449
    self.in_data["request"] = request
7450

    
7451
  def _BuildInputData(self):
7452
    """Build input data structures.
7453

7454
    """
7455
    self._ComputeClusterData()
7456

    
7457
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7458
      self._AddNewInstance()
7459
    else:
7460
      self._AddRelocateInstance()
7461

    
7462
    self.in_text = serializer.Dump(self.in_data)
7463

    
7464
  def Run(self, name, validate=True, call_fn=None):
7465
    """Run an instance allocator and return the results.
7466

7467
    """
7468
    if call_fn is None:
7469
      call_fn = self.rpc.call_iallocator_runner
7470

    
7471
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
7472
    result.Raise("Failure while running the iallocator script")
7473

    
7474
    self.out_text = result.payload
7475
    if validate:
7476
      self._ValidateResult()
7477

    
7478
  def _ValidateResult(self):
7479
    """Process the allocator results.
7480

7481
    This will process and if successful save the result in
7482
    self.out_data and the other parameters.
7483

7484
    """
7485
    try:
7486
      rdict = serializer.Load(self.out_text)
7487
    except Exception, err:
7488
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
7489

    
7490
    if not isinstance(rdict, dict):
7491
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
7492

    
7493
    for key in "success", "info", "nodes":
7494
      if key not in rdict:
7495
        raise errors.OpExecError("Can't parse iallocator results:"
7496
                                 " missing key '%s'" % key)
7497
      setattr(self, key, rdict[key])
7498

    
7499
    if not isinstance(rdict["nodes"], list):
7500
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
7501
                               " is not a list")
7502
    self.out_data = rdict
7503

    
7504

    
7505
class LUTestAllocator(NoHooksLU):
7506
  """Run allocator tests.
7507

7508
  This LU runs the allocator tests
7509

7510
  """
7511
  _OP_REQP = ["direction", "mode", "name"]
7512

    
7513
  def CheckPrereq(self):
7514
    """Check prerequisites.
7515

7516
    This checks the opcode parameters depending on the director and mode test.
7517

7518
    """
7519
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
7520
      for attr in ["name", "mem_size", "disks", "disk_template",
7521
                   "os", "tags", "nics", "vcpus"]:
7522
        if not hasattr(self.op, attr):
7523
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
7524
                                     attr)
7525
      iname = self.cfg.ExpandInstanceName(self.op.name)
7526
      if iname is not None:
7527
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
7528
                                   iname)
7529
      if not isinstance(self.op.nics, list):
7530
        raise errors.OpPrereqError("Invalid parameter 'nics'")
7531
      for row in self.op.nics:
7532
        if (not isinstance(row, dict) or
7533
            "mac" not in row or
7534
            "ip" not in row or
7535
            "bridge" not in row):
7536
          raise errors.OpPrereqError("Invalid contents of the"
7537
                                     " 'nics' parameter")
7538
      if not isinstance(self.op.disks, list):
7539
        raise errors.OpPrereqError("Invalid parameter 'disks'")
7540
      for row in self.op.disks:
7541
        if (not isinstance(row, dict) or
7542
            "size" not in row or
7543
            not isinstance(row["size"], int) or
7544
            "mode" not in row or
7545
            row["mode"] not in ['r', 'w']):
7546
          raise errors.OpPrereqError("Invalid contents of the"
7547
                                     " 'disks' parameter")
7548
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
7549
        self.op.hypervisor = self.cfg.GetHypervisorType()
7550
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
7551
      if not hasattr(self.op, "name"):
7552
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
7553
      fname = self.cfg.ExpandInstanceName(self.op.name)
7554
      if fname is None:
7555
        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
7556
                                   self.op.name)
7557
      self.op.name = fname
7558
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
7559
    else:
7560
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
7561
                                 self.op.mode)
7562

    
7563
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
7564
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
7565
        raise errors.OpPrereqError("Missing allocator name")
7566
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
7567
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
7568
                                 self.op.direction)
7569

    
7570
  def Exec(self, feedback_fn):
7571
    """Run the allocator test.
7572

7573
    """
7574
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
7575
      ial = IAllocator(self.cfg, self.rpc,
7576
                       mode=self.op.mode,
7577
                       name=self.op.name,
7578
                       mem_size=self.op.mem_size,
7579
                       disks=self.op.disks,
7580
                       disk_template=self.op.disk_template,
7581
                       os=self.op.os,
7582
                       tags=self.op.tags,
7583
                       nics=self.op.nics,
7584
                       vcpus=self.op.vcpus,
7585
                       hypervisor=self.op.hypervisor,
7586
                       )
7587
    else:
7588
      ial = IAllocator(self.cfg, self.rpc,
7589
                       mode=self.op.mode,
7590
                       name=self.op.name,
7591
                       relocate_from=list(self.relocate_from),
7592
                       )
7593

    
7594
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
7595
      result = ial.in_text
7596
    else:
7597
      ial.Run(self.op.allocator, validate=False)
7598
      result = ial.out_text
7599
    return result