Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ a4eae71f

History | View | Annotate | Download (295.7 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0613,W0201
25

    
26
import os
27
import os.path
28
import time
29
import re
30
import platform
31
import logging
32
import copy
33

    
34
from ganeti import ssh
35
from ganeti import utils
36
from ganeti import errors
37
from ganeti import hypervisor
38
from ganeti import locking
39
from ganeti import constants
40
from ganeti import objects
41
from ganeti import serializer
42
from ganeti import ssconf
43

    
44

    
45
class LogicalUnit(object):
46
  """Logical Unit base class.
47

48
  Subclasses must follow these rules:
49
    - implement ExpandNames
50
    - implement CheckPrereq (except when tasklets are used)
51
    - implement Exec (except when tasklets are used)
52
    - implement BuildHooksEnv
53
    - redefine HPATH and HTYPE
54
    - optionally redefine their run requirements:
55
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
56

57
  Note that all commands require root permissions.
58

59
  @ivar dry_run_result: the value (if any) that will be returned to the caller
60
      in dry-run mode (signalled by opcode dry_run parameter)
61

62
  """
63
  HPATH = None
64
  HTYPE = None
65
  _OP_REQP = []
66
  REQ_BGL = True
67

    
68
  def __init__(self, processor, op, context, rpc):
69
    """Constructor for LogicalUnit.
70

71
    This needs to be overridden in derived classes in order to check op
72
    validity.
73

74
    """
75
    self.proc = processor
76
    self.op = op
77
    self.cfg = context.cfg
78
    self.context = context
79
    self.rpc = rpc
80
    # Dicts used to declare locking needs to mcpu
81
    self.needed_locks = None
82
    self.acquired_locks = {}
83
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
84
    self.add_locks = {}
85
    self.remove_locks = {}
86
    # Used to force good behavior when calling helper functions
87
    self.recalculate_locks = {}
88
    self.__ssh = None
89
    # logging
90
    self.LogWarning = processor.LogWarning
91
    self.LogInfo = processor.LogInfo
92
    self.LogStep = processor.LogStep
93
    # support for dry-run
94
    self.dry_run_result = None
95

    
96
    # Tasklets
97
    self.tasklets = None
98

    
99
    for attr_name in self._OP_REQP:
100
      attr_val = getattr(op, attr_name, None)
101
      if attr_val is None:
102
        raise errors.OpPrereqError("Required parameter '%s' missing" %
103
                                   attr_name)
104

    
105
    self.CheckArguments()
106

    
107
  def __GetSSH(self):
108
    """Returns the SshRunner object
109

110
    """
111
    if not self.__ssh:
112
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
113
    return self.__ssh
114

    
115
  ssh = property(fget=__GetSSH)
116

    
117
  def CheckArguments(self):
118
    """Check syntactic validity for the opcode arguments.
119

120
    This method is for doing a simple syntactic check and ensure
121
    validity of opcode parameters, without any cluster-related
122
    checks. While the same can be accomplished in ExpandNames and/or
123
    CheckPrereq, doing these separate is better because:
124

125
      - ExpandNames is left as as purely a lock-related function
126
      - CheckPrereq is run after we have acquired locks (and possible
127
        waited for them)
128

129
    The function is allowed to change the self.op attribute so that
130
    later methods can no longer worry about missing parameters.
131

132
    """
133
    pass
134

    
135
  def ExpandNames(self):
136
    """Expand names for this LU.
137

138
    This method is called before starting to execute the opcode, and it should
139
    update all the parameters of the opcode to their canonical form (e.g. a
140
    short node name must be fully expanded after this method has successfully
141
    completed). This way locking, hooks, logging, ecc. can work correctly.
142

143
    LUs which implement this method must also populate the self.needed_locks
144
    member, as a dict with lock levels as keys, and a list of needed lock names
145
    as values. Rules:
146

147
      - use an empty dict if you don't need any lock
148
      - if you don't need any lock at a particular level omit that level
149
      - don't put anything for the BGL level
150
      - if you want all locks at a level use locking.ALL_SET as a value
151

152
    If you need to share locks (rather than acquire them exclusively) at one
153
    level you can modify self.share_locks, setting a true value (usually 1) for
154
    that level. By default locks are not shared.
155

156
    This function can also define a list of tasklets, which then will be
157
    executed in order instead of the usual LU-level CheckPrereq and Exec
158
    functions, if those are not defined by the LU.
159

160
    Examples::
161

162
      # Acquire all nodes and one instance
163
      self.needed_locks = {
164
        locking.LEVEL_NODE: locking.ALL_SET,
165
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
166
      }
167
      # Acquire just two nodes
168
      self.needed_locks = {
169
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
170
      }
171
      # Acquire no locks
172
      self.needed_locks = {} # No, you can't leave it to the default value None
173

174
    """
175
    # The implementation of this method is mandatory only if the new LU is
176
    # concurrent, so that old LUs don't need to be changed all at the same
177
    # time.
178
    if self.REQ_BGL:
179
      self.needed_locks = {} # Exclusive LUs don't need locks.
180
    else:
181
      raise NotImplementedError
182

    
183
  def DeclareLocks(self, level):
184
    """Declare LU locking needs for a level
185

186
    While most LUs can just declare their locking needs at ExpandNames time,
187
    sometimes there's the need to calculate some locks after having acquired
188
    the ones before. This function is called just before acquiring locks at a
189
    particular level, but after acquiring the ones at lower levels, and permits
190
    such calculations. It can be used to modify self.needed_locks, and by
191
    default it does nothing.
192

193
    This function is only called if you have something already set in
194
    self.needed_locks for the level.
195

196
    @param level: Locking level which is going to be locked
197
    @type level: member of ganeti.locking.LEVELS
198

199
    """
200

    
201
  def CheckPrereq(self):
202
    """Check prerequisites for this LU.
203

204
    This method should check that the prerequisites for the execution
205
    of this LU are fulfilled. It can do internode communication, but
206
    it should be idempotent - no cluster or system changes are
207
    allowed.
208

209
    The method should raise errors.OpPrereqError in case something is
210
    not fulfilled. Its return value is ignored.
211

212
    This method should also update all the parameters of the opcode to
213
    their canonical form if it hasn't been done by ExpandNames before.
214

215
    """
216
    if self.tasklets is not None:
217
      for (idx, tl) in enumerate(self.tasklets):
218
        logging.debug("Checking prerequisites for tasklet %s/%s",
219
                      idx + 1, len(self.tasklets))
220
        tl.CheckPrereq()
221
    else:
222
      raise NotImplementedError
223

    
224
  def Exec(self, feedback_fn):
225
    """Execute the LU.
226

227
    This method should implement the actual work. It should raise
228
    errors.OpExecError for failures that are somewhat dealt with in
229
    code, or expected.
230

231
    """
232
    if self.tasklets is not None:
233
      for (idx, tl) in enumerate(self.tasklets):
234
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
235
        tl.Exec(feedback_fn)
236
    else:
237
      raise NotImplementedError
238

    
239
  def BuildHooksEnv(self):
240
    """Build hooks environment for this LU.
241

242
    This method should return a three-node tuple consisting of: a dict
243
    containing the environment that will be used for running the
244
    specific hook for this LU, a list of node names on which the hook
245
    should run before the execution, and a list of node names on which
246
    the hook should run after the execution.
247

248
    The keys of the dict must not have 'GANETI_' prefixed as this will
249
    be handled in the hooks runner. Also note additional keys will be
250
    added by the hooks runner. If the LU doesn't define any
251
    environment, an empty dict (and not None) should be returned.
252

253
    No nodes should be returned as an empty list (and not None).
254

255
    Note that if the HPATH for a LU class is None, this function will
256
    not be called.
257

258
    """
259
    raise NotImplementedError
260

    
261
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
262
    """Notify the LU about the results of its hooks.
263

264
    This method is called every time a hooks phase is executed, and notifies
265
    the Logical Unit about the hooks' result. The LU can then use it to alter
266
    its result based on the hooks.  By default the method does nothing and the
267
    previous result is passed back unchanged but any LU can define it if it
268
    wants to use the local cluster hook-scripts somehow.
269

270
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
271
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
272
    @param hook_results: the results of the multi-node hooks rpc call
273
    @param feedback_fn: function used send feedback back to the caller
274
    @param lu_result: the previous Exec result this LU had, or None
275
        in the PRE phase
276
    @return: the new Exec result, based on the previous result
277
        and hook results
278

279
    """
280
    return lu_result
281

    
282
  def _ExpandAndLockInstance(self):
283
    """Helper function to expand and lock an instance.
284

285
    Many LUs that work on an instance take its name in self.op.instance_name
286
    and need to expand it and then declare the expanded name for locking. This
287
    function does it, and then updates self.op.instance_name to the expanded
288
    name. It also initializes needed_locks as a dict, if this hasn't been done
289
    before.
290

291
    """
292
    if self.needed_locks is None:
293
      self.needed_locks = {}
294
    else:
295
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
296
        "_ExpandAndLockInstance called with instance-level locks set"
297
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
298
    if expanded_name is None:
299
      raise errors.OpPrereqError("Instance '%s' not known" %
300
                                  self.op.instance_name)
301
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
302
    self.op.instance_name = expanded_name
303

    
304
  def _LockInstancesNodes(self, primary_only=False):
305
    """Helper function to declare instances' nodes for locking.
306

307
    This function should be called after locking one or more instances to lock
308
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
309
    with all primary or secondary nodes for instances already locked and
310
    present in self.needed_locks[locking.LEVEL_INSTANCE].
311

312
    It should be called from DeclareLocks, and for safety only works if
313
    self.recalculate_locks[locking.LEVEL_NODE] is set.
314

315
    In the future it may grow parameters to just lock some instance's nodes, or
316
    to just lock primaries or secondary nodes, if needed.
317

318
    If should be called in DeclareLocks in a way similar to::
319

320
      if level == locking.LEVEL_NODE:
321
        self._LockInstancesNodes()
322

323
    @type primary_only: boolean
324
    @param primary_only: only lock primary nodes of locked instances
325

326
    """
327
    assert locking.LEVEL_NODE in self.recalculate_locks, \
328
      "_LockInstancesNodes helper function called with no nodes to recalculate"
329

    
330
    # TODO: check if we're really been called with the instance locks held
331

    
332
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
333
    # future we might want to have different behaviors depending on the value
334
    # of self.recalculate_locks[locking.LEVEL_NODE]
335
    wanted_nodes = []
336
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
337
      instance = self.context.cfg.GetInstanceInfo(instance_name)
338
      wanted_nodes.append(instance.primary_node)
339
      if not primary_only:
340
        wanted_nodes.extend(instance.secondary_nodes)
341

    
342
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
343
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
344
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
345
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
346

    
347
    del self.recalculate_locks[locking.LEVEL_NODE]
348

    
349

    
350
class NoHooksLU(LogicalUnit):
351
  """Simple LU which runs no hooks.
352

353
  This LU is intended as a parent for other LogicalUnits which will
354
  run no hooks, in order to reduce duplicate code.
355

356
  """
357
  HPATH = None
358
  HTYPE = None
359

    
360

    
361
class Tasklet:
362
  """Tasklet base class.
363

364
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
365
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
366
  tasklets know nothing about locks.
367

368
  Subclasses must follow these rules:
369
    - Implement CheckPrereq
370
    - Implement Exec
371

372
  """
373
  def __init__(self, lu):
374
    self.lu = lu
375

    
376
    # Shortcuts
377
    self.cfg = lu.cfg
378
    self.rpc = lu.rpc
379

    
380
  def CheckPrereq(self):
381
    """Check prerequisites for this tasklets.
382

383
    This method should check whether the prerequisites for the execution of
384
    this tasklet are fulfilled. It can do internode communication, but it
385
    should be idempotent - no cluster or system changes are allowed.
386

387
    The method should raise errors.OpPrereqError in case something is not
388
    fulfilled. Its return value is ignored.
389

390
    This method should also update all parameters to their canonical form if it
391
    hasn't been done before.
392

393
    """
394
    raise NotImplementedError
395

    
396
  def Exec(self, feedback_fn):
397
    """Execute the tasklet.
398

399
    This method should implement the actual work. It should raise
400
    errors.OpExecError for failures that are somewhat dealt with in code, or
401
    expected.
402

403
    """
404
    raise NotImplementedError
405

    
406

    
407
def _GetWantedNodes(lu, nodes):
408
  """Returns list of checked and expanded node names.
409

410
  @type lu: L{LogicalUnit}
411
  @param lu: the logical unit on whose behalf we execute
412
  @type nodes: list
413
  @param nodes: list of node names or None for all nodes
414
  @rtype: list
415
  @return: the list of nodes, sorted
416
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
417

418
  """
419
  if not isinstance(nodes, list):
420
    raise errors.OpPrereqError("Invalid argument type 'nodes'")
421

    
422
  if not nodes:
423
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
424
      " non-empty list of nodes whose name is to be expanded.")
425

    
426
  wanted = []
427
  for name in nodes:
428
    node = lu.cfg.ExpandNodeName(name)
429
    if node is None:
430
      raise errors.OpPrereqError("No such node name '%s'" % name)
431
    wanted.append(node)
432

    
433
  return utils.NiceSort(wanted)
434

    
435

    
436
def _GetWantedInstances(lu, instances):
437
  """Returns list of checked and expanded instance names.
438

439
  @type lu: L{LogicalUnit}
440
  @param lu: the logical unit on whose behalf we execute
441
  @type instances: list
442
  @param instances: list of instance names or None for all instances
443
  @rtype: list
444
  @return: the list of instances, sorted
445
  @raise errors.OpPrereqError: if the instances parameter is wrong type
446
  @raise errors.OpPrereqError: if any of the passed instances is not found
447

448
  """
449
  if not isinstance(instances, list):
450
    raise errors.OpPrereqError("Invalid argument type 'instances'")
451

    
452
  if instances:
453
    wanted = []
454

    
455
    for name in instances:
456
      instance = lu.cfg.ExpandInstanceName(name)
457
      if instance is None:
458
        raise errors.OpPrereqError("No such instance name '%s'" % name)
459
      wanted.append(instance)
460

    
461
  else:
462
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
463
  return wanted
464

    
465

    
466
def _CheckOutputFields(static, dynamic, selected):
467
  """Checks whether all selected fields are valid.
468

469
  @type static: L{utils.FieldSet}
470
  @param static: static fields set
471
  @type dynamic: L{utils.FieldSet}
472
  @param dynamic: dynamic fields set
473

474
  """
475
  f = utils.FieldSet()
476
  f.Extend(static)
477
  f.Extend(dynamic)
478

    
479
  delta = f.NonMatching(selected)
480
  if delta:
481
    raise errors.OpPrereqError("Unknown output fields selected: %s"
482
                               % ",".join(delta))
483

    
484

    
485
def _CheckBooleanOpField(op, name):
486
  """Validates boolean opcode parameters.
487

488
  This will ensure that an opcode parameter is either a boolean value,
489
  or None (but that it always exists).
490

491
  """
492
  val = getattr(op, name, None)
493
  if not (val is None or isinstance(val, bool)):
494
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
495
                               (name, str(val)))
496
  setattr(op, name, val)
497

    
498

    
499
def _CheckNodeOnline(lu, node):
500
  """Ensure that a given node is online.
501

502
  @param lu: the LU on behalf of which we make the check
503
  @param node: the node to check
504
  @raise errors.OpPrereqError: if the node is offline
505

506
  """
507
  if lu.cfg.GetNodeInfo(node).offline:
508
    raise errors.OpPrereqError("Can't use offline node %s" % node)
509

    
510

    
511
def _CheckNodeNotDrained(lu, node):
512
  """Ensure that a given node is not drained.
513

514
  @param lu: the LU on behalf of which we make the check
515
  @param node: the node to check
516
  @raise errors.OpPrereqError: if the node is drained
517

518
  """
519
  if lu.cfg.GetNodeInfo(node).drained:
520
    raise errors.OpPrereqError("Can't use drained node %s" % node)
521

    
522

    
523
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
524
                          memory, vcpus, nics, disk_template, disks,
525
                          bep, hvp, hypervisor_name):
526
  """Builds instance related env variables for hooks
527

528
  This builds the hook environment from individual variables.
529

530
  @type name: string
531
  @param name: the name of the instance
532
  @type primary_node: string
533
  @param primary_node: the name of the instance's primary node
534
  @type secondary_nodes: list
535
  @param secondary_nodes: list of secondary nodes as strings
536
  @type os_type: string
537
  @param os_type: the name of the instance's OS
538
  @type status: boolean
539
  @param status: the should_run status of the instance
540
  @type memory: string
541
  @param memory: the memory size of the instance
542
  @type vcpus: string
543
  @param vcpus: the count of VCPUs the instance has
544
  @type nics: list
545
  @param nics: list of tuples (ip, mac, mode, link) representing
546
      the NICs the instance has
547
  @type disk_template: string
548
  @param disk_template: the disk template of the instance
549
  @type disks: list
550
  @param disks: the list of (size, mode) pairs
551
  @type bep: dict
552
  @param bep: the backend parameters for the instance
553
  @type hvp: dict
554
  @param hvp: the hypervisor parameters for the instance
555
  @type hypervisor_name: string
556
  @param hypervisor_name: the hypervisor for the instance
557
  @rtype: dict
558
  @return: the hook environment for this instance
559

560
  """
561
  if status:
562
    str_status = "up"
563
  else:
564
    str_status = "down"
565
  env = {
566
    "OP_TARGET": name,
567
    "INSTANCE_NAME": name,
568
    "INSTANCE_PRIMARY": primary_node,
569
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
570
    "INSTANCE_OS_TYPE": os_type,
571
    "INSTANCE_STATUS": str_status,
572
    "INSTANCE_MEMORY": memory,
573
    "INSTANCE_VCPUS": vcpus,
574
    "INSTANCE_DISK_TEMPLATE": disk_template,
575
    "INSTANCE_HYPERVISOR": hypervisor_name,
576
  }
577

    
578
  if nics:
579
    nic_count = len(nics)
580
    for idx, (ip, mac, mode, link) in enumerate(nics):
581
      if ip is None:
582
        ip = ""
583
      env["INSTANCE_NIC%d_IP" % idx] = ip
584
      env["INSTANCE_NIC%d_MAC" % idx] = mac
585
      env["INSTANCE_NIC%d_MODE" % idx] = mode
586
      env["INSTANCE_NIC%d_LINK" % idx] = link
587
      if mode == constants.NIC_MODE_BRIDGED:
588
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
589
  else:
590
    nic_count = 0
591

    
592
  env["INSTANCE_NIC_COUNT"] = nic_count
593

    
594
  if disks:
595
    disk_count = len(disks)
596
    for idx, (size, mode) in enumerate(disks):
597
      env["INSTANCE_DISK%d_SIZE" % idx] = size
598
      env["INSTANCE_DISK%d_MODE" % idx] = mode
599
  else:
600
    disk_count = 0
601

    
602
  env["INSTANCE_DISK_COUNT"] = disk_count
603

    
604
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
605
    for key, value in source.items():
606
      env["INSTANCE_%s_%s" % (kind, key)] = value
607

    
608
  return env
609

    
610

    
611
def _NICListToTuple(lu, nics):
612
  """Build a list of nic information tuples.
613

614
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
615
  value in LUQueryInstanceData.
616

617
  @type lu:  L{LogicalUnit}
618
  @param lu: the logical unit on whose behalf we execute
619
  @type nics: list of L{objects.NIC}
620
  @param nics: list of nics to convert to hooks tuples
621

622
  """
623
  hooks_nics = []
624
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
625
  for nic in nics:
626
    ip = nic.ip
627
    mac = nic.mac
628
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
629
    mode = filled_params[constants.NIC_MODE]
630
    link = filled_params[constants.NIC_LINK]
631
    hooks_nics.append((ip, mac, mode, link))
632
  return hooks_nics
633

    
634

    
635
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
636
  """Builds instance related env variables for hooks from an object.
637

638
  @type lu: L{LogicalUnit}
639
  @param lu: the logical unit on whose behalf we execute
640
  @type instance: L{objects.Instance}
641
  @param instance: the instance for which we should build the
642
      environment
643
  @type override: dict
644
  @param override: dictionary with key/values that will override
645
      our values
646
  @rtype: dict
647
  @return: the hook environment dictionary
648

649
  """
650
  cluster = lu.cfg.GetClusterInfo()
651
  bep = cluster.FillBE(instance)
652
  hvp = cluster.FillHV(instance)
653
  args = {
654
    'name': instance.name,
655
    'primary_node': instance.primary_node,
656
    'secondary_nodes': instance.secondary_nodes,
657
    'os_type': instance.os,
658
    'status': instance.admin_up,
659
    'memory': bep[constants.BE_MEMORY],
660
    'vcpus': bep[constants.BE_VCPUS],
661
    'nics': _NICListToTuple(lu, instance.nics),
662
    'disk_template': instance.disk_template,
663
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
664
    'bep': bep,
665
    'hvp': hvp,
666
    'hypervisor_name': instance.hypervisor,
667
  }
668
  if override:
669
    args.update(override)
670
  return _BuildInstanceHookEnv(**args)
671

    
672

    
673
def _AdjustCandidatePool(lu, exceptions):
674
  """Adjust the candidate pool after node operations.
675

676
  """
677
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
678
  if mod_list:
679
    lu.LogInfo("Promoted nodes to master candidate role: %s",
680
               ", ".join(node.name for node in mod_list))
681
    for name in mod_list:
682
      lu.context.ReaddNode(name)
683
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
684
  if mc_now > mc_max:
685
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
686
               (mc_now, mc_max))
687

    
688

    
689
def _DecideSelfPromotion(lu, exceptions=None):
690
  """Decide whether I should promote myself as a master candidate.
691

692
  """
693
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
694
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
695
  # the new node will increase mc_max with one, so:
696
  mc_should = min(mc_should + 1, cp_size)
697
  return mc_now < mc_should
698

    
699

    
700
def _CheckNicsBridgesExist(lu, target_nics, target_node,
701
                               profile=constants.PP_DEFAULT):
702
  """Check that the brigdes needed by a list of nics exist.
703

704
  """
705
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
706
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
707
                for nic in target_nics]
708
  brlist = [params[constants.NIC_LINK] for params in paramslist
709
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
710
  if brlist:
711
    result = lu.rpc.call_bridges_exist(target_node, brlist)
712
    result.Raise("Error checking bridges on destination node '%s'" %
713
                 target_node, prereq=True)
714

    
715

    
716
def _CheckInstanceBridgesExist(lu, instance, node=None):
717
  """Check that the brigdes needed by an instance exist.
718

719
  """
720
  if node is None:
721
    node = instance.primary_node
722
  _CheckNicsBridgesExist(lu, instance.nics, node)
723

    
724

    
725
def _CheckOSVariant(os, name):
726
  """Check whether an OS name conforms to the os variants specification.
727

728
  @type os: L{objects.OS}
729
  @param os: OS object to check
730
  @type name: string
731
  @param name: OS name passed by the user, to check for validity
732

733
  """
734
  if not os.supported_variants:
735
    return
736
  try:
737
    variant = name.split("+", 1)[1]
738
  except IndexError:
739
    raise errors.OpPrereqError("OS name must include a variant")
740

    
741
  if variant not in os.supported_variants:
742
    raise errors.OpPrereqError("Unsupported OS variant")
743

    
744

    
745
def _GetNodeInstancesInner(cfg, fn):
746
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
747

    
748

    
749
def _GetNodeInstances(cfg, node_name):
750
  """Returns a list of all primary and secondary instances on a node.
751

752
  """
753

    
754
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
755

    
756

    
757
def _GetNodePrimaryInstances(cfg, node_name):
758
  """Returns primary instances on a node.
759

760
  """
761
  return _GetNodeInstancesInner(cfg,
762
                                lambda inst: node_name == inst.primary_node)
763

    
764

    
765
def _GetNodeSecondaryInstances(cfg, node_name):
766
  """Returns secondary instances on a node.
767

768
  """
769
  return _GetNodeInstancesInner(cfg,
770
                                lambda inst: node_name in inst.secondary_nodes)
771

    
772

    
773
def _GetStorageTypeArgs(cfg, storage_type):
774
  """Returns the arguments for a storage type.
775

776
  """
777
  # Special case for file storage
778
  if storage_type == constants.ST_FILE:
779
    # storage.FileStorage wants a list of storage directories
780
    return [[cfg.GetFileStorageDir()]]
781

    
782
  return []
783

    
784

    
785
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
786
  faulty = []
787

    
788
  for dev in instance.disks:
789
    cfg.SetDiskID(dev, node_name)
790

    
791
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
792
  result.Raise("Failed to get disk status from node %s" % node_name,
793
               prereq=prereq)
794

    
795
  for idx, bdev_status in enumerate(result.payload):
796
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
797
      faulty.append(idx)
798

    
799
  return faulty
800

    
801

    
802
class LUPostInitCluster(LogicalUnit):
803
  """Logical unit for running hooks after cluster initialization.
804

805
  """
806
  HPATH = "cluster-init"
807
  HTYPE = constants.HTYPE_CLUSTER
808
  _OP_REQP = []
809

    
810
  def BuildHooksEnv(self):
811
    """Build hooks env.
812

813
    """
814
    env = {"OP_TARGET": self.cfg.GetClusterName()}
815
    mn = self.cfg.GetMasterNode()
816
    return env, [], [mn]
817

    
818
  def CheckPrereq(self):
819
    """No prerequisites to check.
820

821
    """
822
    return True
823

    
824
  def Exec(self, feedback_fn):
825
    """Nothing to do.
826

827
    """
828
    return True
829

    
830

    
831
class LUDestroyCluster(LogicalUnit):
832
  """Logical unit for destroying the cluster.
833

834
  """
835
  HPATH = "cluster-destroy"
836
  HTYPE = constants.HTYPE_CLUSTER
837
  _OP_REQP = []
838

    
839
  def BuildHooksEnv(self):
840
    """Build hooks env.
841

842
    """
843
    env = {"OP_TARGET": self.cfg.GetClusterName()}
844
    return env, [], []
845

    
846
  def CheckPrereq(self):
847
    """Check prerequisites.
848

849
    This checks whether the cluster is empty.
850

851
    Any errors are signaled by raising errors.OpPrereqError.
852

853
    """
854
    master = self.cfg.GetMasterNode()
855

    
856
    nodelist = self.cfg.GetNodeList()
857
    if len(nodelist) != 1 or nodelist[0] != master:
858
      raise errors.OpPrereqError("There are still %d node(s) in"
859
                                 " this cluster." % (len(nodelist) - 1))
860
    instancelist = self.cfg.GetInstanceList()
861
    if instancelist:
862
      raise errors.OpPrereqError("There are still %d instance(s) in"
863
                                 " this cluster." % len(instancelist))
864

    
865
  def Exec(self, feedback_fn):
866
    """Destroys the cluster.
867

868
    """
869
    master = self.cfg.GetMasterNode()
870
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
871

    
872
    # Run post hooks on master node before it's removed
873
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
874
    try:
875
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
876
    except:
877
      self.LogWarning("Errors occurred running hooks on %s" % master)
878

    
879
    result = self.rpc.call_node_stop_master(master, False)
880
    result.Raise("Could not disable the master role")
881

    
882
    if modify_ssh_setup:
883
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
884
      utils.CreateBackup(priv_key)
885
      utils.CreateBackup(pub_key)
886

    
887
    return master
888

    
889

    
890
class LUVerifyCluster(LogicalUnit):
891
  """Verifies the cluster status.
892

893
  """
894
  HPATH = "cluster-verify"
895
  HTYPE = constants.HTYPE_CLUSTER
896
  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
897
  REQ_BGL = False
898

    
899
  TCLUSTER = "cluster"
900
  TNODE = "node"
901
  TINSTANCE = "instance"
902

    
903
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
904
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
905
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
906
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
907
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
908
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
909
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
910
  ENODEDRBD = (TNODE, "ENODEDRBD")
911
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
912
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
913
  ENODEHV = (TNODE, "ENODEHV")
914
  ENODELVM = (TNODE, "ENODELVM")
915
  ENODEN1 = (TNODE, "ENODEN1")
916
  ENODENET = (TNODE, "ENODENET")
917
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
918
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
919
  ENODERPC = (TNODE, "ENODERPC")
920
  ENODESSH = (TNODE, "ENODESSH")
921
  ENODEVERSION = (TNODE, "ENODEVERSION")
922
  ENODESETUP = (TNODE, "ENODESETUP")
923

    
924
  ETYPE_FIELD = "code"
925
  ETYPE_ERROR = "ERROR"
926
  ETYPE_WARNING = "WARNING"
927

    
928
  def ExpandNames(self):
929
    self.needed_locks = {
930
      locking.LEVEL_NODE: locking.ALL_SET,
931
      locking.LEVEL_INSTANCE: locking.ALL_SET,
932
    }
933
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
934

    
935
  def _Error(self, ecode, item, msg, *args, **kwargs):
936
    """Format an error message.
937

938
    Based on the opcode's error_codes parameter, either format a
939
    parseable error code, or a simpler error string.
940

941
    This must be called only from Exec and functions called from Exec.
942

943
    """
944
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
945
    itype, etxt = ecode
946
    # first complete the msg
947
    if args:
948
      msg = msg % args
949
    # then format the whole message
950
    if self.op.error_codes:
951
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
952
    else:
953
      if item:
954
        item = " " + item
955
      else:
956
        item = ""
957
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
958
    # and finally report it via the feedback_fn
959
    self._feedback_fn("  - %s" % msg)
960

    
961
  def _ErrorIf(self, cond, *args, **kwargs):
962
    """Log an error message if the passed condition is True.
963

964
    """
965
    cond = bool(cond) or self.op.debug_simulate_errors
966
    if cond:
967
      self._Error(*args, **kwargs)
968
    # do not mark the operation as failed for WARN cases only
969
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
970
      self.bad = self.bad or cond
971

    
972
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
973
                  node_result, master_files, drbd_map, vg_name):
974
    """Run multiple tests against a node.
975

976
    Test list:
977

978
      - compares ganeti version
979
      - checks vg existence and size > 20G
980
      - checks config file checksum
981
      - checks ssh to other nodes
982

983
    @type nodeinfo: L{objects.Node}
984
    @param nodeinfo: the node to check
985
    @param file_list: required list of files
986
    @param local_cksum: dictionary of local files and their checksums
987
    @param node_result: the results from the node
988
    @param master_files: list of files that only masters should have
989
    @param drbd_map: the useddrbd minors for this node, in
990
        form of minor: (instance, must_exist) which correspond to instances
991
        and their running status
992
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
993

994
    """
995
    node = nodeinfo.name
996
    _ErrorIf = self._ErrorIf
997

    
998
    # main result, node_result should be a non-empty dict
999
    test = not node_result or not isinstance(node_result, dict)
1000
    _ErrorIf(test, self.ENODERPC, node,
1001
                  "unable to verify node: no data returned")
1002
    if test:
1003
      return
1004

    
1005
    # compares ganeti version
1006
    local_version = constants.PROTOCOL_VERSION
1007
    remote_version = node_result.get('version', None)
1008
    test = not (remote_version and
1009
                isinstance(remote_version, (list, tuple)) and
1010
                len(remote_version) == 2)
1011
    _ErrorIf(test, self.ENODERPC, node,
1012
             "connection to node returned invalid data")
1013
    if test:
1014
      return
1015

    
1016
    test = local_version != remote_version[0]
1017
    _ErrorIf(test, self.ENODEVERSION, node,
1018
             "incompatible protocol versions: master %s,"
1019
             " node %s", local_version, remote_version[0])
1020
    if test:
1021
      return
1022

    
1023
    # node seems compatible, we can actually try to look into its results
1024

    
1025
    # full package version
1026
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1027
                  self.ENODEVERSION, node,
1028
                  "software version mismatch: master %s, node %s",
1029
                  constants.RELEASE_VERSION, remote_version[1],
1030
                  code=self.ETYPE_WARNING)
1031

    
1032
    # checks vg existence and size > 20G
1033
    if vg_name is not None:
1034
      vglist = node_result.get(constants.NV_VGLIST, None)
1035
      test = not vglist
1036
      _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1037
      if not test:
1038
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1039
                                              constants.MIN_VG_SIZE)
1040
        _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1041

    
1042
    # checks config file checksum
1043

    
1044
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
1045
    test = not isinstance(remote_cksum, dict)
1046
    _ErrorIf(test, self.ENODEFILECHECK, node,
1047
             "node hasn't returned file checksum data")
1048
    if not test:
1049
      for file_name in file_list:
1050
        node_is_mc = nodeinfo.master_candidate
1051
        must_have = (file_name not in master_files) or node_is_mc
1052
        # missing
1053
        test1 = file_name not in remote_cksum
1054
        # invalid checksum
1055
        test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1056
        # existing and good
1057
        test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1058
        _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1059
                 "file '%s' missing", file_name)
1060
        _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1061
                 "file '%s' has wrong checksum", file_name)
1062
        # not candidate and this is not a must-have file
1063
        _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1064
                 "file '%s' should not exist on non master"
1065
                 " candidates (and the file is outdated)", file_name)
1066
        # all good, except non-master/non-must have combination
1067
        _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1068
                 "file '%s' should not exist"
1069
                 " on non master candidates", file_name)
1070

    
1071
    # checks ssh to any
1072

    
1073
    test = constants.NV_NODELIST not in node_result
1074
    _ErrorIf(test, self.ENODESSH, node,
1075
             "node hasn't returned node ssh connectivity data")
1076
    if not test:
1077
      if node_result[constants.NV_NODELIST]:
1078
        for a_node, a_msg in node_result[constants.NV_NODELIST].items():
1079
          _ErrorIf(True, self.ENODESSH, node,
1080
                   "ssh communication with node '%s': %s", a_node, a_msg)
1081

    
1082
    test = constants.NV_NODENETTEST not in node_result
1083
    _ErrorIf(test, self.ENODENET, node,
1084
             "node hasn't returned node tcp connectivity data")
1085
    if not test:
1086
      if node_result[constants.NV_NODENETTEST]:
1087
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
1088
        for anode in nlist:
1089
          _ErrorIf(True, self.ENODENET, node,
1090
                   "tcp communication with node '%s': %s",
1091
                   anode, node_result[constants.NV_NODENETTEST][anode])
1092

    
1093
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
1094
    if isinstance(hyp_result, dict):
1095
      for hv_name, hv_result in hyp_result.iteritems():
1096
        test = hv_result is not None
1097
        _ErrorIf(test, self.ENODEHV, node,
1098
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1099

    
1100
    # check used drbd list
1101
    if vg_name is not None:
1102
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
1103
      test = not isinstance(used_minors, (tuple, list))
1104
      _ErrorIf(test, self.ENODEDRBD, node,
1105
               "cannot parse drbd status file: %s", str(used_minors))
1106
      if not test:
1107
        for minor, (iname, must_exist) in drbd_map.items():
1108
          test = minor not in used_minors and must_exist
1109
          _ErrorIf(test, self.ENODEDRBD, node,
1110
                   "drbd minor %d of instance %s is not active",
1111
                   minor, iname)
1112
        for minor in used_minors:
1113
          test = minor not in drbd_map
1114
          _ErrorIf(test, self.ENODEDRBD, node,
1115
                   "unallocated drbd minor %d is in use", minor)
1116
    test = node_result.get(constants.NV_NODESETUP,
1117
                           ["Missing NODESETUP results"])
1118
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1119
             "; ".join(test))
1120

    
1121
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1122
                      node_instance, n_offline):
1123
    """Verify an instance.
1124

1125
    This function checks to see if the required block devices are
1126
    available on the instance's node.
1127

1128
    """
1129
    _ErrorIf = self._ErrorIf
1130
    node_current = instanceconfig.primary_node
1131

    
1132
    node_vol_should = {}
1133
    instanceconfig.MapLVsByNode(node_vol_should)
1134

    
1135
    for node in node_vol_should:
1136
      if node in n_offline:
1137
        # ignore missing volumes on offline nodes
1138
        continue
1139
      for volume in node_vol_should[node]:
1140
        test = node not in node_vol_is or volume not in node_vol_is[node]
1141
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1142
                 "volume %s missing on node %s", volume, node)
1143

    
1144
    if instanceconfig.admin_up:
1145
      test = ((node_current not in node_instance or
1146
               not instance in node_instance[node_current]) and
1147
              node_current not in n_offline)
1148
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1149
               "instance not running on its primary node %s",
1150
               node_current)
1151

    
1152
    for node in node_instance:
1153
      if (not node == node_current):
1154
        test = instance in node_instance[node]
1155
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1156
                 "instance should not run on node %s", node)
1157

    
1158
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is):
1159
    """Verify if there are any unknown volumes in the cluster.
1160

1161
    The .os, .swap and backup volumes are ignored. All other volumes are
1162
    reported as unknown.
1163

1164
    """
1165
    for node in node_vol_is:
1166
      for volume in node_vol_is[node]:
1167
        test = (node not in node_vol_should or
1168
                volume not in node_vol_should[node])
1169
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1170
                      "volume %s is unknown", volume)
1171

    
1172
  def _VerifyOrphanInstances(self, instancelist, node_instance):
1173
    """Verify the list of running instances.
1174

1175
    This checks what instances are running but unknown to the cluster.
1176

1177
    """
1178
    for node in node_instance:
1179
      for o_inst in node_instance[node]:
1180
        test = o_inst not in instancelist
1181
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1182
                      "instance %s on node %s should not exist", o_inst, node)
1183

    
1184
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg):
1185
    """Verify N+1 Memory Resilience.
1186

1187
    Check that if one single node dies we can still start all the instances it
1188
    was primary for.
1189

1190
    """
1191
    for node, nodeinfo in node_info.iteritems():
1192
      # This code checks that every node which is now listed as secondary has
1193
      # enough memory to host all instances it is supposed to should a single
1194
      # other node in the cluster fail.
1195
      # FIXME: not ready for failover to an arbitrary node
1196
      # FIXME: does not support file-backed instances
1197
      # WARNING: we currently take into account down instances as well as up
1198
      # ones, considering that even if they're down someone might want to start
1199
      # them even in the event of a node failure.
1200
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1201
        needed_mem = 0
1202
        for instance in instances:
1203
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1204
          if bep[constants.BE_AUTO_BALANCE]:
1205
            needed_mem += bep[constants.BE_MEMORY]
1206
        test = nodeinfo['mfree'] < needed_mem
1207
        self._ErrorIf(test, self.ENODEN1, node,
1208
                      "not enough memory on to accommodate"
1209
                      " failovers should peer node %s fail", prinode)
1210

    
1211
  def CheckPrereq(self):
1212
    """Check prerequisites.
1213

1214
    Transform the list of checks we're going to skip into a set and check that
1215
    all its members are valid.
1216

1217
    """
1218
    self.skip_set = frozenset(self.op.skip_checks)
1219
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1220
      raise errors.OpPrereqError("Invalid checks to be skipped specified")
1221

    
1222
  def BuildHooksEnv(self):
1223
    """Build hooks env.
1224

1225
    Cluster-Verify hooks just ran in the post phase and their failure makes
1226
    the output be logged in the verify output and the verification to fail.
1227

1228
    """
1229
    all_nodes = self.cfg.GetNodeList()
1230
    env = {
1231
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1232
      }
1233
    for node in self.cfg.GetAllNodesInfo().values():
1234
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1235

    
1236
    return env, [], all_nodes
1237

    
1238
  def Exec(self, feedback_fn):
1239
    """Verify integrity of cluster, performing various test on nodes.
1240

1241
    """
1242
    self.bad = False
1243
    _ErrorIf = self._ErrorIf
1244
    verbose = self.op.verbose
1245
    self._feedback_fn = feedback_fn
1246
    feedback_fn("* Verifying global settings")
1247
    for msg in self.cfg.VerifyConfig():
1248
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1249

    
1250
    vg_name = self.cfg.GetVGName()
1251
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1252
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1253
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1254
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1255
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1256
                        for iname in instancelist)
1257
    i_non_redundant = [] # Non redundant instances
1258
    i_non_a_balanced = [] # Non auto-balanced instances
1259
    n_offline = [] # List of offline nodes
1260
    n_drained = [] # List of nodes being drained
1261
    node_volume = {}
1262
    node_instance = {}
1263
    node_info = {}
1264
    instance_cfg = {}
1265

    
1266
    # FIXME: verify OS list
1267
    # do local checksums
1268
    master_files = [constants.CLUSTER_CONF_FILE]
1269

    
1270
    file_names = ssconf.SimpleStore().GetFileList()
1271
    file_names.append(constants.SSL_CERT_FILE)
1272
    file_names.append(constants.RAPI_CERT_FILE)
1273
    file_names.extend(master_files)
1274

    
1275
    local_checksums = utils.FingerprintFiles(file_names)
1276

    
1277
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1278
    node_verify_param = {
1279
      constants.NV_FILELIST: file_names,
1280
      constants.NV_NODELIST: [node.name for node in nodeinfo
1281
                              if not node.offline],
1282
      constants.NV_HYPERVISOR: hypervisors,
1283
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1284
                                  node.secondary_ip) for node in nodeinfo
1285
                                 if not node.offline],
1286
      constants.NV_INSTANCELIST: hypervisors,
1287
      constants.NV_VERSION: None,
1288
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1289
      constants.NV_NODESETUP: None,
1290
      }
1291
    if vg_name is not None:
1292
      node_verify_param[constants.NV_VGLIST] = None
1293
      node_verify_param[constants.NV_LVLIST] = vg_name
1294
      node_verify_param[constants.NV_DRBDLIST] = None
1295
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1296
                                           self.cfg.GetClusterName())
1297

    
1298
    cluster = self.cfg.GetClusterInfo()
1299
    master_node = self.cfg.GetMasterNode()
1300
    all_drbd_map = self.cfg.ComputeDRBDMap()
1301

    
1302
    feedback_fn("* Verifying node status")
1303
    for node_i in nodeinfo:
1304
      node = node_i.name
1305

    
1306
      if node_i.offline:
1307
        if verbose:
1308
          feedback_fn("* Skipping offline node %s" % (node,))
1309
        n_offline.append(node)
1310
        continue
1311

    
1312
      if node == master_node:
1313
        ntype = "master"
1314
      elif node_i.master_candidate:
1315
        ntype = "master candidate"
1316
      elif node_i.drained:
1317
        ntype = "drained"
1318
        n_drained.append(node)
1319
      else:
1320
        ntype = "regular"
1321
      if verbose:
1322
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1323

    
1324
      msg = all_nvinfo[node].fail_msg
1325
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1326
      if msg:
1327
        continue
1328

    
1329
      nresult = all_nvinfo[node].payload
1330
      node_drbd = {}
1331
      for minor, instance in all_drbd_map[node].items():
1332
        test = instance not in instanceinfo
1333
        _ErrorIf(test, self.ECLUSTERCFG, None,
1334
                 "ghost instance '%s' in temporary DRBD map", instance)
1335
          # ghost instance should not be running, but otherwise we
1336
          # don't give double warnings (both ghost instance and
1337
          # unallocated minor in use)
1338
        if test:
1339
          node_drbd[minor] = (instance, False)
1340
        else:
1341
          instance = instanceinfo[instance]
1342
          node_drbd[minor] = (instance.name, instance.admin_up)
1343
      self._VerifyNode(node_i, file_names, local_checksums,
1344
                       nresult, master_files, node_drbd, vg_name)
1345

    
1346
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1347
      if vg_name is None:
1348
        node_volume[node] = {}
1349
      elif isinstance(lvdata, basestring):
1350
        _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1351
                 utils.SafeEncode(lvdata))
1352
        node_volume[node] = {}
1353
      elif not isinstance(lvdata, dict):
1354
        _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1355
        continue
1356
      else:
1357
        node_volume[node] = lvdata
1358

    
1359
      # node_instance
1360
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1361
      test = not isinstance(idata, list)
1362
      _ErrorIf(test, self.ENODEHV, node,
1363
               "rpc call to node failed (instancelist)")
1364
      if test:
1365
        continue
1366

    
1367
      node_instance[node] = idata
1368

    
1369
      # node_info
1370
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1371
      test = not isinstance(nodeinfo, dict)
1372
      _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1373
      if test:
1374
        continue
1375

    
1376
      try:
1377
        node_info[node] = {
1378
          "mfree": int(nodeinfo['memory_free']),
1379
          "pinst": [],
1380
          "sinst": [],
1381
          # dictionary holding all instances this node is secondary for,
1382
          # grouped by their primary node. Each key is a cluster node, and each
1383
          # value is a list of instances which have the key as primary and the
1384
          # current node as secondary.  this is handy to calculate N+1 memory
1385
          # availability if you can only failover from a primary to its
1386
          # secondary.
1387
          "sinst-by-pnode": {},
1388
        }
1389
        # FIXME: devise a free space model for file based instances as well
1390
        if vg_name is not None:
1391
          test = (constants.NV_VGLIST not in nresult or
1392
                  vg_name not in nresult[constants.NV_VGLIST])
1393
          _ErrorIf(test, self.ENODELVM, node,
1394
                   "node didn't return data for the volume group '%s'"
1395
                   " - it is either missing or broken", vg_name)
1396
          if test:
1397
            continue
1398
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1399
      except (ValueError, KeyError):
1400
        _ErrorIf(True, self.ENODERPC, node,
1401
                 "node returned invalid nodeinfo, check lvm/hypervisor")
1402
        continue
1403

    
1404
    node_vol_should = {}
1405

    
1406
    feedback_fn("* Verifying instance status")
1407
    for instance in instancelist:
1408
      if verbose:
1409
        feedback_fn("* Verifying instance %s" % instance)
1410
      inst_config = instanceinfo[instance]
1411
      self._VerifyInstance(instance, inst_config, node_volume,
1412
                           node_instance, n_offline)
1413
      inst_nodes_offline = []
1414

    
1415
      inst_config.MapLVsByNode(node_vol_should)
1416

    
1417
      instance_cfg[instance] = inst_config
1418

    
1419
      pnode = inst_config.primary_node
1420
      _ErrorIf(pnode not in node_info and pnode not in n_offline,
1421
               self.ENODERPC, pnode, "instance %s, connection to"
1422
               " primary node failed", instance)
1423
      if pnode in node_info:
1424
        node_info[pnode]['pinst'].append(instance)
1425

    
1426
      if pnode in n_offline:
1427
        inst_nodes_offline.append(pnode)
1428

    
1429
      # If the instance is non-redundant we cannot survive losing its primary
1430
      # node, so we are not N+1 compliant. On the other hand we have no disk
1431
      # templates with more than one secondary so that situation is not well
1432
      # supported either.
1433
      # FIXME: does not support file-backed instances
1434
      if len(inst_config.secondary_nodes) == 0:
1435
        i_non_redundant.append(instance)
1436
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
1437
               self.EINSTANCELAYOUT, instance,
1438
               "instance has multiple secondary nodes", code="WARNING")
1439

    
1440
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1441
        i_non_a_balanced.append(instance)
1442

    
1443
      for snode in inst_config.secondary_nodes:
1444
        _ErrorIf(snode not in node_info and snode not in n_offline,
1445
                 self.ENODERPC, snode,
1446
                 "instance %s, connection to secondary node"
1447
                 "failed", instance)
1448

    
1449
        if snode in node_info:
1450
          node_info[snode]['sinst'].append(instance)
1451
          if pnode not in node_info[snode]['sinst-by-pnode']:
1452
            node_info[snode]['sinst-by-pnode'][pnode] = []
1453
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1454

    
1455
        if snode in n_offline:
1456
          inst_nodes_offline.append(snode)
1457

    
1458
      # warn that the instance lives on offline nodes
1459
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1460
               "instance lives on offline node(s) %s",
1461
               ", ".join(inst_nodes_offline))
1462

    
1463
    feedback_fn("* Verifying orphan volumes")
1464
    self._VerifyOrphanVolumes(node_vol_should, node_volume)
1465

    
1466
    feedback_fn("* Verifying remaining instances")
1467
    self._VerifyOrphanInstances(instancelist, node_instance)
1468

    
1469
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1470
      feedback_fn("* Verifying N+1 Memory redundancy")
1471
      self._VerifyNPlusOneMemory(node_info, instance_cfg)
1472

    
1473
    feedback_fn("* Other Notes")
1474
    if i_non_redundant:
1475
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1476
                  % len(i_non_redundant))
1477

    
1478
    if i_non_a_balanced:
1479
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1480
                  % len(i_non_a_balanced))
1481

    
1482
    if n_offline:
1483
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1484

    
1485
    if n_drained:
1486
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1487

    
1488
    return not self.bad
1489

    
1490
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1491
    """Analyze the post-hooks' result
1492

1493
    This method analyses the hook result, handles it, and sends some
1494
    nicely-formatted feedback back to the user.
1495

1496
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1497
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1498
    @param hooks_results: the results of the multi-node hooks rpc call
1499
    @param feedback_fn: function used send feedback back to the caller
1500
    @param lu_result: previous Exec result
1501
    @return: the new Exec result, based on the previous result
1502
        and hook results
1503

1504
    """
1505
    # We only really run POST phase hooks, and are only interested in
1506
    # their results
1507
    if phase == constants.HOOKS_PHASE_POST:
1508
      # Used to change hooks' output to proper indentation
1509
      indent_re = re.compile('^', re.M)
1510
      feedback_fn("* Hooks Results")
1511
      assert hooks_results, "invalid result from hooks"
1512

    
1513
      for node_name in hooks_results:
1514
        show_node_header = True
1515
        res = hooks_results[node_name]
1516
        msg = res.fail_msg
1517
        test = msg and not res.offline
1518
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
1519
                      "Communication failure in hooks execution: %s", msg)
1520
        if test:
1521
          # override manually lu_result here as _ErrorIf only
1522
          # overrides self.bad
1523
          lu_result = 1
1524
          continue
1525
        for script, hkr, output in res.payload:
1526
          test = hkr == constants.HKR_FAIL
1527
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
1528
                        "Script %s failed, output:", script)
1529
          if test:
1530
            output = indent_re.sub('      ', output)
1531
            feedback_fn("%s" % output)
1532
            lu_result = 1
1533

    
1534
      return lu_result
1535

    
1536

    
1537
class LUVerifyDisks(NoHooksLU):
1538
  """Verifies the cluster disks status.
1539

1540
  """
1541
  _OP_REQP = []
1542
  REQ_BGL = False
1543

    
1544
  def ExpandNames(self):
1545
    self.needed_locks = {
1546
      locking.LEVEL_NODE: locking.ALL_SET,
1547
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1548
    }
1549
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1550

    
1551
  def CheckPrereq(self):
1552
    """Check prerequisites.
1553

1554
    This has no prerequisites.
1555

1556
    """
1557
    pass
1558

    
1559
  def Exec(self, feedback_fn):
1560
    """Verify integrity of cluster disks.
1561

1562
    @rtype: tuple of three items
1563
    @return: a tuple of (dict of node-to-node_error, list of instances
1564
        which need activate-disks, dict of instance: (node, volume) for
1565
        missing volumes
1566

1567
    """
1568
    result = res_nodes, res_instances, res_missing = {}, [], {}
1569

    
1570
    vg_name = self.cfg.GetVGName()
1571
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1572
    instances = [self.cfg.GetInstanceInfo(name)
1573
                 for name in self.cfg.GetInstanceList()]
1574

    
1575
    nv_dict = {}
1576
    for inst in instances:
1577
      inst_lvs = {}
1578
      if (not inst.admin_up or
1579
          inst.disk_template not in constants.DTS_NET_MIRROR):
1580
        continue
1581
      inst.MapLVsByNode(inst_lvs)
1582
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1583
      for node, vol_list in inst_lvs.iteritems():
1584
        for vol in vol_list:
1585
          nv_dict[(node, vol)] = inst
1586

    
1587
    if not nv_dict:
1588
      return result
1589

    
1590
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1591

    
1592
    for node in nodes:
1593
      # node_volume
1594
      node_res = node_lvs[node]
1595
      if node_res.offline:
1596
        continue
1597
      msg = node_res.fail_msg
1598
      if msg:
1599
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1600
        res_nodes[node] = msg
1601
        continue
1602

    
1603
      lvs = node_res.payload
1604
      for lv_name, (_, lv_inactive, lv_online) in lvs.items():
1605
        inst = nv_dict.pop((node, lv_name), None)
1606
        if (not lv_online and inst is not None
1607
            and inst.name not in res_instances):
1608
          res_instances.append(inst.name)
1609

    
1610
    # any leftover items in nv_dict are missing LVs, let's arrange the
1611
    # data better
1612
    for key, inst in nv_dict.iteritems():
1613
      if inst.name not in res_missing:
1614
        res_missing[inst.name] = []
1615
      res_missing[inst.name].append(key)
1616

    
1617
    return result
1618

    
1619

    
1620
class LURepairDiskSizes(NoHooksLU):
1621
  """Verifies the cluster disks sizes.
1622

1623
  """
1624
  _OP_REQP = ["instances"]
1625
  REQ_BGL = False
1626

    
1627
  def ExpandNames(self):
1628
    if not isinstance(self.op.instances, list):
1629
      raise errors.OpPrereqError("Invalid argument type 'instances'")
1630

    
1631
    if self.op.instances:
1632
      self.wanted_names = []
1633
      for name in self.op.instances:
1634
        full_name = self.cfg.ExpandInstanceName(name)
1635
        if full_name is None:
1636
          raise errors.OpPrereqError("Instance '%s' not known" % name)
1637
        self.wanted_names.append(full_name)
1638
      self.needed_locks = {
1639
        locking.LEVEL_NODE: [],
1640
        locking.LEVEL_INSTANCE: self.wanted_names,
1641
        }
1642
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1643
    else:
1644
      self.wanted_names = None
1645
      self.needed_locks = {
1646
        locking.LEVEL_NODE: locking.ALL_SET,
1647
        locking.LEVEL_INSTANCE: locking.ALL_SET,
1648
        }
1649
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1650

    
1651
  def DeclareLocks(self, level):
1652
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
1653
      self._LockInstancesNodes(primary_only=True)
1654

    
1655
  def CheckPrereq(self):
1656
    """Check prerequisites.
1657

1658
    This only checks the optional instance list against the existing names.
1659

1660
    """
1661
    if self.wanted_names is None:
1662
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1663

    
1664
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1665
                             in self.wanted_names]
1666

    
1667
  def _EnsureChildSizes(self, disk):
1668
    """Ensure children of the disk have the needed disk size.
1669

1670
    This is valid mainly for DRBD8 and fixes an issue where the
1671
    children have smaller disk size.
1672

1673
    @param disk: an L{ganeti.objects.Disk} object
1674

1675
    """
1676
    if disk.dev_type == constants.LD_DRBD8:
1677
      assert disk.children, "Empty children for DRBD8?"
1678
      fchild = disk.children[0]
1679
      mismatch = fchild.size < disk.size
1680
      if mismatch:
1681
        self.LogInfo("Child disk has size %d, parent %d, fixing",
1682
                     fchild.size, disk.size)
1683
        fchild.size = disk.size
1684

    
1685
      # and we recurse on this child only, not on the metadev
1686
      return self._EnsureChildSizes(fchild) or mismatch
1687
    else:
1688
      return False
1689

    
1690
  def Exec(self, feedback_fn):
1691
    """Verify the size of cluster disks.
1692

1693
    """
1694
    # TODO: check child disks too
1695
    # TODO: check differences in size between primary/secondary nodes
1696
    per_node_disks = {}
1697
    for instance in self.wanted_instances:
1698
      pnode = instance.primary_node
1699
      if pnode not in per_node_disks:
1700
        per_node_disks[pnode] = []
1701
      for idx, disk in enumerate(instance.disks):
1702
        per_node_disks[pnode].append((instance, idx, disk))
1703

    
1704
    changed = []
1705
    for node, dskl in per_node_disks.items():
1706
      newl = [v[2].Copy() for v in dskl]
1707
      for dsk in newl:
1708
        self.cfg.SetDiskID(dsk, node)
1709
      result = self.rpc.call_blockdev_getsizes(node, newl)
1710
      if result.fail_msg:
1711
        self.LogWarning("Failure in blockdev_getsizes call to node"
1712
                        " %s, ignoring", node)
1713
        continue
1714
      if len(result.data) != len(dskl):
1715
        self.LogWarning("Invalid result from node %s, ignoring node results",
1716
                        node)
1717
        continue
1718
      for ((instance, idx, disk), size) in zip(dskl, result.data):
1719
        if size is None:
1720
          self.LogWarning("Disk %d of instance %s did not return size"
1721
                          " information, ignoring", idx, instance.name)
1722
          continue
1723
        if not isinstance(size, (int, long)):
1724
          self.LogWarning("Disk %d of instance %s did not return valid"
1725
                          " size information, ignoring", idx, instance.name)
1726
          continue
1727
        size = size >> 20
1728
        if size != disk.size:
1729
          self.LogInfo("Disk %d of instance %s has mismatched size,"
1730
                       " correcting: recorded %d, actual %d", idx,
1731
                       instance.name, disk.size, size)
1732
          disk.size = size
1733
          self.cfg.Update(instance, feedback_fn)
1734
          changed.append((instance.name, idx, size))
1735
        if self._EnsureChildSizes(disk):
1736
          self.cfg.Update(instance, feedback_fn)
1737
          changed.append((instance.name, idx, disk.size))
1738
    return changed
1739

    
1740

    
1741
class LURenameCluster(LogicalUnit):
1742
  """Rename the cluster.
1743

1744
  """
1745
  HPATH = "cluster-rename"
1746
  HTYPE = constants.HTYPE_CLUSTER
1747
  _OP_REQP = ["name"]
1748

    
1749
  def BuildHooksEnv(self):
1750
    """Build hooks env.
1751

1752
    """
1753
    env = {
1754
      "OP_TARGET": self.cfg.GetClusterName(),
1755
      "NEW_NAME": self.op.name,
1756
      }
1757
    mn = self.cfg.GetMasterNode()
1758
    return env, [mn], [mn]
1759

    
1760
  def CheckPrereq(self):
1761
    """Verify that the passed name is a valid one.
1762

1763
    """
1764
    hostname = utils.HostInfo(self.op.name)
1765

    
1766
    new_name = hostname.name
1767
    self.ip = new_ip = hostname.ip
1768
    old_name = self.cfg.GetClusterName()
1769
    old_ip = self.cfg.GetMasterIP()
1770
    if new_name == old_name and new_ip == old_ip:
1771
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1772
                                 " cluster has changed")
1773
    if new_ip != old_ip:
1774
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1775
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1776
                                   " reachable on the network. Aborting." %
1777
                                   new_ip)
1778

    
1779
    self.op.name = new_name
1780

    
1781
  def Exec(self, feedback_fn):
1782
    """Rename the cluster.
1783

1784
    """
1785
    clustername = self.op.name
1786
    ip = self.ip
1787

    
1788
    # shutdown the master IP
1789
    master = self.cfg.GetMasterNode()
1790
    result = self.rpc.call_node_stop_master(master, False)
1791
    result.Raise("Could not disable the master role")
1792

    
1793
    try:
1794
      cluster = self.cfg.GetClusterInfo()
1795
      cluster.cluster_name = clustername
1796
      cluster.master_ip = ip
1797
      self.cfg.Update(cluster, feedback_fn)
1798

    
1799
      # update the known hosts file
1800
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1801
      node_list = self.cfg.GetNodeList()
1802
      try:
1803
        node_list.remove(master)
1804
      except ValueError:
1805
        pass
1806
      result = self.rpc.call_upload_file(node_list,
1807
                                         constants.SSH_KNOWN_HOSTS_FILE)
1808
      for to_node, to_result in result.iteritems():
1809
        msg = to_result.fail_msg
1810
        if msg:
1811
          msg = ("Copy of file %s to node %s failed: %s" %
1812
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1813
          self.proc.LogWarning(msg)
1814

    
1815
    finally:
1816
      result = self.rpc.call_node_start_master(master, False, False)
1817
      msg = result.fail_msg
1818
      if msg:
1819
        self.LogWarning("Could not re-enable the master role on"
1820
                        " the master, please restart manually: %s", msg)
1821

    
1822

    
1823
def _RecursiveCheckIfLVMBased(disk):
1824
  """Check if the given disk or its children are lvm-based.
1825

1826
  @type disk: L{objects.Disk}
1827
  @param disk: the disk to check
1828
  @rtype: boolean
1829
  @return: boolean indicating whether a LD_LV dev_type was found or not
1830

1831
  """
1832
  if disk.children:
1833
    for chdisk in disk.children:
1834
      if _RecursiveCheckIfLVMBased(chdisk):
1835
        return True
1836
  return disk.dev_type == constants.LD_LV
1837

    
1838

    
1839
class LUSetClusterParams(LogicalUnit):
1840
  """Change the parameters of the cluster.
1841

1842
  """
1843
  HPATH = "cluster-modify"
1844
  HTYPE = constants.HTYPE_CLUSTER
1845
  _OP_REQP = []
1846
  REQ_BGL = False
1847

    
1848
  def CheckArguments(self):
1849
    """Check parameters
1850

1851
    """
1852
    if not hasattr(self.op, "candidate_pool_size"):
1853
      self.op.candidate_pool_size = None
1854
    if self.op.candidate_pool_size is not None:
1855
      try:
1856
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1857
      except (ValueError, TypeError), err:
1858
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1859
                                   str(err))
1860
      if self.op.candidate_pool_size < 1:
1861
        raise errors.OpPrereqError("At least one master candidate needed")
1862

    
1863
  def ExpandNames(self):
1864
    # FIXME: in the future maybe other cluster params won't require checking on
1865
    # all nodes to be modified.
1866
    self.needed_locks = {
1867
      locking.LEVEL_NODE: locking.ALL_SET,
1868
    }
1869
    self.share_locks[locking.LEVEL_NODE] = 1
1870

    
1871
  def BuildHooksEnv(self):
1872
    """Build hooks env.
1873

1874
    """
1875
    env = {
1876
      "OP_TARGET": self.cfg.GetClusterName(),
1877
      "NEW_VG_NAME": self.op.vg_name,
1878
      }
1879
    mn = self.cfg.GetMasterNode()
1880
    return env, [mn], [mn]
1881

    
1882
  def CheckPrereq(self):
1883
    """Check prerequisites.
1884

1885
    This checks whether the given params don't conflict and
1886
    if the given volume group is valid.
1887

1888
    """
1889
    if self.op.vg_name is not None and not self.op.vg_name:
1890
      instances = self.cfg.GetAllInstancesInfo().values()
1891
      for inst in instances:
1892
        for disk in inst.disks:
1893
          if _RecursiveCheckIfLVMBased(disk):
1894
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1895
                                       " lvm-based instances exist")
1896

    
1897
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1898

    
1899
    # if vg_name not None, checks given volume group on all nodes
1900
    if self.op.vg_name:
1901
      vglist = self.rpc.call_vg_list(node_list)
1902
      for node in node_list:
1903
        msg = vglist[node].fail_msg
1904
        if msg:
1905
          # ignoring down node
1906
          self.LogWarning("Error while gathering data on node %s"
1907
                          " (ignoring node): %s", node, msg)
1908
          continue
1909
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
1910
                                              self.op.vg_name,
1911
                                              constants.MIN_VG_SIZE)
1912
        if vgstatus:
1913
          raise errors.OpPrereqError("Error on node '%s': %s" %
1914
                                     (node, vgstatus))
1915

    
1916
    self.cluster = cluster = self.cfg.GetClusterInfo()
1917
    # validate params changes
1918
    if self.op.beparams:
1919
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1920
      self.new_beparams = objects.FillDict(
1921
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
1922

    
1923
    if self.op.nicparams:
1924
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
1925
      self.new_nicparams = objects.FillDict(
1926
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
1927
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
1928

    
1929
    # hypervisor list/parameters
1930
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
1931
    if self.op.hvparams:
1932
      if not isinstance(self.op.hvparams, dict):
1933
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
1934
      for hv_name, hv_dict in self.op.hvparams.items():
1935
        if hv_name not in self.new_hvparams:
1936
          self.new_hvparams[hv_name] = hv_dict
1937
        else:
1938
          self.new_hvparams[hv_name].update(hv_dict)
1939

    
1940
    if self.op.enabled_hypervisors is not None:
1941
      self.hv_list = self.op.enabled_hypervisors
1942
      if not self.hv_list:
1943
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
1944
                                   " least one member")
1945
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
1946
      if invalid_hvs:
1947
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
1948
                                   " entries: %s" % " ,".join(invalid_hvs))
1949
    else:
1950
      self.hv_list = cluster.enabled_hypervisors
1951

    
1952
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
1953
      # either the enabled list has changed, or the parameters have, validate
1954
      for hv_name, hv_params in self.new_hvparams.items():
1955
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
1956
            (self.op.enabled_hypervisors and
1957
             hv_name in self.op.enabled_hypervisors)):
1958
          # either this is a new hypervisor, or its parameters have changed
1959
          hv_class = hypervisor.GetHypervisor(hv_name)
1960
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1961
          hv_class.CheckParameterSyntax(hv_params)
1962
          _CheckHVParams(self, node_list, hv_name, hv_params)
1963

    
1964
  def Exec(self, feedback_fn):
1965
    """Change the parameters of the cluster.
1966

1967
    """
1968
    if self.op.vg_name is not None:
1969
      new_volume = self.op.vg_name
1970
      if not new_volume:
1971
        new_volume = None
1972
      if new_volume != self.cfg.GetVGName():
1973
        self.cfg.SetVGName(new_volume)
1974
      else:
1975
        feedback_fn("Cluster LVM configuration already in desired"
1976
                    " state, not changing")
1977
    if self.op.hvparams:
1978
      self.cluster.hvparams = self.new_hvparams
1979
    if self.op.enabled_hypervisors is not None:
1980
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1981
    if self.op.beparams:
1982
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
1983
    if self.op.nicparams:
1984
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
1985

    
1986
    if self.op.candidate_pool_size is not None:
1987
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
1988
      # we need to update the pool size here, otherwise the save will fail
1989
      _AdjustCandidatePool(self, [])
1990

    
1991
    self.cfg.Update(self.cluster, feedback_fn)
1992

    
1993

    
1994
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
1995
  """Distribute additional files which are part of the cluster configuration.
1996

1997
  ConfigWriter takes care of distributing the config and ssconf files, but
1998
  there are more files which should be distributed to all nodes. This function
1999
  makes sure those are copied.
2000

2001
  @param lu: calling logical unit
2002
  @param additional_nodes: list of nodes not in the config to distribute to
2003

2004
  """
2005
  # 1. Gather target nodes
2006
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2007
  dist_nodes = lu.cfg.GetNodeList()
2008
  if additional_nodes is not None:
2009
    dist_nodes.extend(additional_nodes)
2010
  if myself.name in dist_nodes:
2011
    dist_nodes.remove(myself.name)
2012

    
2013
  # 2. Gather files to distribute
2014
  dist_files = set([constants.ETC_HOSTS,
2015
                    constants.SSH_KNOWN_HOSTS_FILE,
2016
                    constants.RAPI_CERT_FILE,
2017
                    constants.RAPI_USERS_FILE,
2018
                    constants.HMAC_CLUSTER_KEY,
2019
                   ])
2020

    
2021
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2022
  for hv_name in enabled_hypervisors:
2023
    hv_class = hypervisor.GetHypervisor(hv_name)
2024
    dist_files.update(hv_class.GetAncillaryFiles())
2025

    
2026
  # 3. Perform the files upload
2027
  for fname in dist_files:
2028
    if os.path.exists(fname):
2029
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2030
      for to_node, to_result in result.items():
2031
        msg = to_result.fail_msg
2032
        if msg:
2033
          msg = ("Copy of file %s to node %s failed: %s" %
2034
                 (fname, to_node, msg))
2035
          lu.proc.LogWarning(msg)
2036

    
2037

    
2038
class LURedistributeConfig(NoHooksLU):
2039
  """Force the redistribution of cluster configuration.
2040

2041
  This is a very simple LU.
2042

2043
  """
2044
  _OP_REQP = []
2045
  REQ_BGL = False
2046

    
2047
  def ExpandNames(self):
2048
    self.needed_locks = {
2049
      locking.LEVEL_NODE: locking.ALL_SET,
2050
    }
2051
    self.share_locks[locking.LEVEL_NODE] = 1
2052

    
2053
  def CheckPrereq(self):
2054
    """Check prerequisites.
2055

2056
    """
2057

    
2058
  def Exec(self, feedback_fn):
2059
    """Redistribute the configuration.
2060

2061
    """
2062
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2063
    _RedistributeAncillaryFiles(self)
2064

    
2065

    
2066
def _WaitForSync(lu, instance, oneshot=False, unlock=False):
2067
  """Sleep and poll for an instance's disk to sync.
2068

2069
  """
2070
  if not instance.disks:
2071
    return True
2072

    
2073
  if not oneshot:
2074
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2075

    
2076
  node = instance.primary_node
2077

    
2078
  for dev in instance.disks:
2079
    lu.cfg.SetDiskID(dev, node)
2080

    
2081
  retries = 0
2082
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2083
  while True:
2084
    max_time = 0
2085
    done = True
2086
    cumul_degraded = False
2087
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2088
    msg = rstats.fail_msg
2089
    if msg:
2090
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2091
      retries += 1
2092
      if retries >= 10:
2093
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2094
                                 " aborting." % node)
2095
      time.sleep(6)
2096
      continue
2097
    rstats = rstats.payload
2098
    retries = 0
2099
    for i, mstat in enumerate(rstats):
2100
      if mstat is None:
2101
        lu.LogWarning("Can't compute data for node %s/%s",
2102
                           node, instance.disks[i].iv_name)
2103
        continue
2104

    
2105
      cumul_degraded = (cumul_degraded or
2106
                        (mstat.is_degraded and mstat.sync_percent is None))
2107
      if mstat.sync_percent is not None:
2108
        done = False
2109
        if mstat.estimated_time is not None:
2110
          rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2111
          max_time = mstat.estimated_time
2112
        else:
2113
          rem_time = "no time estimate"
2114
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2115
                        (instance.disks[i].iv_name, mstat.sync_percent,
2116
                         rem_time))
2117

    
2118
    # if we're done but degraded, let's do a few small retries, to
2119
    # make sure we see a stable and not transient situation; therefore
2120
    # we force restart of the loop
2121
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2122
      logging.info("Degraded disks found, %d retries left", degr_retries)
2123
      degr_retries -= 1
2124
      time.sleep(1)
2125
      continue
2126

    
2127
    if done or oneshot:
2128
      break
2129

    
2130
    time.sleep(min(60, max_time))
2131

    
2132
  if done:
2133
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2134
  return not cumul_degraded
2135

    
2136

    
2137
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2138
  """Check that mirrors are not degraded.
2139

2140
  The ldisk parameter, if True, will change the test from the
2141
  is_degraded attribute (which represents overall non-ok status for
2142
  the device(s)) to the ldisk (representing the local storage status).
2143

2144
  """
2145
  lu.cfg.SetDiskID(dev, node)
2146

    
2147
  result = True
2148

    
2149
  if on_primary or dev.AssembleOnSecondary():
2150
    rstats = lu.rpc.call_blockdev_find(node, dev)
2151
    msg = rstats.fail_msg
2152
    if msg:
2153
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2154
      result = False
2155
    elif not rstats.payload:
2156
      lu.LogWarning("Can't find disk on node %s", node)
2157
      result = False
2158
    else:
2159
      if ldisk:
2160
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2161
      else:
2162
        result = result and not rstats.payload.is_degraded
2163

    
2164
  if dev.children:
2165
    for child in dev.children:
2166
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2167

    
2168
  return result
2169

    
2170

    
2171
class LUDiagnoseOS(NoHooksLU):
2172
  """Logical unit for OS diagnose/query.
2173

2174
  """
2175
  _OP_REQP = ["output_fields", "names"]
2176
  REQ_BGL = False
2177
  _FIELDS_STATIC = utils.FieldSet()
2178
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2179
  # Fields that need calculation of global os validity
2180
  _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2181

    
2182
  def ExpandNames(self):
2183
    if self.op.names:
2184
      raise errors.OpPrereqError("Selective OS query not supported")
2185

    
2186
    _CheckOutputFields(static=self._FIELDS_STATIC,
2187
                       dynamic=self._FIELDS_DYNAMIC,
2188
                       selected=self.op.output_fields)
2189

    
2190
    # Lock all nodes, in shared mode
2191
    # Temporary removal of locks, should be reverted later
2192
    # TODO: reintroduce locks when they are lighter-weight
2193
    self.needed_locks = {}
2194
    #self.share_locks[locking.LEVEL_NODE] = 1
2195
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2196

    
2197
  def CheckPrereq(self):
2198
    """Check prerequisites.
2199

2200
    """
2201

    
2202
  @staticmethod
2203
  def _DiagnoseByOS(node_list, rlist):
2204
    """Remaps a per-node return list into an a per-os per-node dictionary
2205

2206
    @param node_list: a list with the names of all nodes
2207
    @param rlist: a map with node names as keys and OS objects as values
2208

2209
    @rtype: dict
2210
    @return: a dictionary with osnames as keys and as value another map, with
2211
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
2212

2213
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2214
                                     (/srv/..., False, "invalid api")],
2215
                           "node2": [(/srv/..., True, "")]}
2216
          }
2217

2218
    """
2219
    all_os = {}
2220
    # we build here the list of nodes that didn't fail the RPC (at RPC
2221
    # level), so that nodes with a non-responding node daemon don't
2222
    # make all OSes invalid
2223
    good_nodes = [node_name for node_name in rlist
2224
                  if not rlist[node_name].fail_msg]
2225
    for node_name, nr in rlist.items():
2226
      if nr.fail_msg or not nr.payload:
2227
        continue
2228
      for name, path, status, diagnose, variants in nr.payload:
2229
        if name not in all_os:
2230
          # build a list of nodes for this os containing empty lists
2231
          # for each node in node_list
2232
          all_os[name] = {}
2233
          for nname in good_nodes:
2234
            all_os[name][nname] = []
2235
        all_os[name][node_name].append((path, status, diagnose, variants))
2236
    return all_os
2237

    
2238
  def Exec(self, feedback_fn):
2239
    """Compute the list of OSes.
2240

2241
    """
2242
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2243
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2244
    pol = self._DiagnoseByOS(valid_nodes, node_data)
2245
    output = []
2246
    calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2247
    calc_variants = "variants" in self.op.output_fields
2248

    
2249
    for os_name, os_data in pol.items():
2250
      row = []
2251
      if calc_valid:
2252
        valid = True
2253
        variants = None
2254
        for osl in os_data.values():
2255
          valid = valid and osl and osl[0][1]
2256
          if not valid:
2257
            variants = None
2258
            break
2259
          if calc_variants:
2260
            node_variants = osl[0][3]
2261
            if variants is None:
2262
              variants = node_variants
2263
            else:
2264
              variants = [v for v in variants if v in node_variants]
2265

    
2266
      for field in self.op.output_fields:
2267
        if field == "name":
2268
          val = os_name
2269
        elif field == "valid":
2270
          val = valid
2271
        elif field == "node_status":
2272
          # this is just a copy of the dict
2273
          val = {}
2274
          for node_name, nos_list in os_data.items():
2275
            val[node_name] = nos_list
2276
        elif field == "variants":
2277
          val =  variants
2278
        else:
2279
          raise errors.ParameterError(field)
2280
        row.append(val)
2281
      output.append(row)
2282

    
2283
    return output
2284

    
2285

    
2286
class LURemoveNode(LogicalUnit):
2287
  """Logical unit for removing a node.
2288

2289
  """
2290
  HPATH = "node-remove"
2291
  HTYPE = constants.HTYPE_NODE
2292
  _OP_REQP = ["node_name"]
2293

    
2294
  def BuildHooksEnv(self):
2295
    """Build hooks env.
2296

2297
    This doesn't run on the target node in the pre phase as a failed
2298
    node would then be impossible to remove.
2299

2300
    """
2301
    env = {
2302
      "OP_TARGET": self.op.node_name,
2303
      "NODE_NAME": self.op.node_name,
2304
      }
2305
    all_nodes = self.cfg.GetNodeList()
2306
    if self.op.node_name in all_nodes:
2307
      all_nodes.remove(self.op.node_name)
2308
    return env, all_nodes, all_nodes
2309

    
2310
  def CheckPrereq(self):
2311
    """Check prerequisites.
2312

2313
    This checks:
2314
     - the node exists in the configuration
2315
     - it does not have primary or secondary instances
2316
     - it's not the master
2317

2318
    Any errors are signaled by raising errors.OpPrereqError.
2319

2320
    """
2321
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
2322
    if node is None:
2323
      raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
2324

    
2325
    instance_list = self.cfg.GetInstanceList()
2326

    
2327
    masternode = self.cfg.GetMasterNode()
2328
    if node.name == masternode:
2329
      raise errors.OpPrereqError("Node is the master node,"
2330
                                 " you need to failover first.")
2331

    
2332
    for instance_name in instance_list:
2333
      instance = self.cfg.GetInstanceInfo(instance_name)
2334
      if node.name in instance.all_nodes:
2335
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2336
                                   " please remove first." % instance_name)
2337
    self.op.node_name = node.name
2338
    self.node = node
2339

    
2340
  def Exec(self, feedback_fn):
2341
    """Removes the node from the cluster.
2342

2343
    """
2344
    node = self.node
2345
    logging.info("Stopping the node daemon and removing configs from node %s",
2346
                 node.name)
2347

    
2348
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2349

    
2350
    # Promote nodes to master candidate as needed
2351
    _AdjustCandidatePool(self, exceptions=[node.name])
2352
    self.context.RemoveNode(node.name)
2353

    
2354
    # Run post hooks on the node before it's removed
2355
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2356
    try:
2357
      h_results = hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2358
    except:
2359
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
2360

    
2361
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2362
    msg = result.fail_msg
2363
    if msg:
2364
      self.LogWarning("Errors encountered on the remote node while leaving"
2365
                      " the cluster: %s", msg)
2366

    
2367

    
2368
class LUQueryNodes(NoHooksLU):
2369
  """Logical unit for querying nodes.
2370

2371
  """
2372
  _OP_REQP = ["output_fields", "names", "use_locking"]
2373
  REQ_BGL = False
2374

    
2375
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2376
                    "master_candidate", "offline", "drained"]
2377

    
2378
  _FIELDS_DYNAMIC = utils.FieldSet(
2379
    "dtotal", "dfree",
2380
    "mtotal", "mnode", "mfree",
2381
    "bootid",
2382
    "ctotal", "cnodes", "csockets",
2383
    )
2384

    
2385
  _FIELDS_STATIC = utils.FieldSet(*[
2386
    "pinst_cnt", "sinst_cnt",
2387
    "pinst_list", "sinst_list",
2388
    "pip", "sip", "tags",
2389
    "master",
2390
    "role"] + _SIMPLE_FIELDS
2391
    )
2392

    
2393
  def ExpandNames(self):
2394
    _CheckOutputFields(static=self._FIELDS_STATIC,
2395
                       dynamic=self._FIELDS_DYNAMIC,
2396
                       selected=self.op.output_fields)
2397

    
2398
    self.needed_locks = {}
2399
    self.share_locks[locking.LEVEL_NODE] = 1
2400

    
2401
    if self.op.names:
2402
      self.wanted = _GetWantedNodes(self, self.op.names)
2403
    else:
2404
      self.wanted = locking.ALL_SET
2405

    
2406
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2407
    self.do_locking = self.do_node_query and self.op.use_locking
2408
    if self.do_locking:
2409
      # if we don't request only static fields, we need to lock the nodes
2410
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2411

    
2412
  def CheckPrereq(self):
2413
    """Check prerequisites.
2414

2415
    """
2416
    # The validation of the node list is done in the _GetWantedNodes,
2417
    # if non empty, and if empty, there's no validation to do
2418
    pass
2419

    
2420
  def Exec(self, feedback_fn):
2421
    """Computes the list of nodes and their attributes.
2422

2423
    """
2424
    all_info = self.cfg.GetAllNodesInfo()
2425
    if self.do_locking:
2426
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2427
    elif self.wanted != locking.ALL_SET:
2428
      nodenames = self.wanted
2429
      missing = set(nodenames).difference(all_info.keys())
2430
      if missing:
2431
        raise errors.OpExecError(
2432
          "Some nodes were removed before retrieving their data: %s" % missing)
2433
    else:
2434
      nodenames = all_info.keys()
2435

    
2436
    nodenames = utils.NiceSort(nodenames)
2437
    nodelist = [all_info[name] for name in nodenames]
2438

    
2439
    # begin data gathering
2440

    
2441
    if self.do_node_query:
2442
      live_data = {}
2443
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2444
                                          self.cfg.GetHypervisorType())
2445
      for name in nodenames:
2446
        nodeinfo = node_data[name]
2447
        if not nodeinfo.fail_msg and nodeinfo.payload:
2448
          nodeinfo = nodeinfo.payload
2449
          fn = utils.TryConvert
2450
          live_data[name] = {
2451
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2452
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2453
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2454
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2455
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2456
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2457
            "bootid": nodeinfo.get('bootid', None),
2458
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2459
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2460
            }
2461
        else:
2462
          live_data[name] = {}
2463
    else:
2464
      live_data = dict.fromkeys(nodenames, {})
2465

    
2466
    node_to_primary = dict([(name, set()) for name in nodenames])
2467
    node_to_secondary = dict([(name, set()) for name in nodenames])
2468

    
2469
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2470
                             "sinst_cnt", "sinst_list"))
2471
    if inst_fields & frozenset(self.op.output_fields):
2472
      instancelist = self.cfg.GetInstanceList()
2473

    
2474
      for instance_name in instancelist:
2475
        inst = self.cfg.GetInstanceInfo(instance_name)
2476
        if inst.primary_node in node_to_primary:
2477
          node_to_primary[inst.primary_node].add(inst.name)
2478
        for secnode in inst.secondary_nodes:
2479
          if secnode in node_to_secondary:
2480
            node_to_secondary[secnode].add(inst.name)
2481

    
2482
    master_node = self.cfg.GetMasterNode()
2483

    
2484
    # end data gathering
2485

    
2486
    output = []
2487
    for node in nodelist:
2488
      node_output = []
2489
      for field in self.op.output_fields:
2490
        if field in self._SIMPLE_FIELDS:
2491
          val = getattr(node, field)
2492
        elif field == "pinst_list":
2493
          val = list(node_to_primary[node.name])
2494
        elif field == "sinst_list":
2495
          val = list(node_to_secondary[node.name])
2496
        elif field == "pinst_cnt":
2497
          val = len(node_to_primary[node.name])
2498
        elif field == "sinst_cnt":
2499
          val = len(node_to_secondary[node.name])
2500
        elif field == "pip":
2501
          val = node.primary_ip
2502
        elif field == "sip":
2503
          val = node.secondary_ip
2504
        elif field == "tags":
2505
          val = list(node.GetTags())
2506
        elif field == "master":
2507
          val = node.name == master_node
2508
        elif self._FIELDS_DYNAMIC.Matches(field):
2509
          val = live_data[node.name].get(field, None)
2510
        elif field == "role":
2511
          if node.name == master_node:
2512
            val = "M"
2513
          elif node.master_candidate:
2514
            val = "C"
2515
          elif node.drained:
2516
            val = "D"
2517
          elif node.offline:
2518
            val = "O"
2519
          else:
2520
            val = "R"
2521
        else:
2522
          raise errors.ParameterError(field)
2523
        node_output.append(val)
2524
      output.append(node_output)
2525

    
2526
    return output
2527

    
2528

    
2529
class LUQueryNodeVolumes(NoHooksLU):
2530
  """Logical unit for getting volumes on node(s).
2531

2532
  """
2533
  _OP_REQP = ["nodes", "output_fields"]
2534
  REQ_BGL = False
2535
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2536
  _FIELDS_STATIC = utils.FieldSet("node")
2537

    
2538
  def ExpandNames(self):
2539
    _CheckOutputFields(static=self._FIELDS_STATIC,
2540
                       dynamic=self._FIELDS_DYNAMIC,
2541
                       selected=self.op.output_fields)
2542

    
2543
    self.needed_locks = {}
2544
    self.share_locks[locking.LEVEL_NODE] = 1
2545
    if not self.op.nodes:
2546
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2547
    else:
2548
      self.needed_locks[locking.LEVEL_NODE] = \
2549
        _GetWantedNodes(self, self.op.nodes)
2550

    
2551
  def CheckPrereq(self):
2552
    """Check prerequisites.
2553

2554
    This checks that the fields required are valid output fields.
2555

2556
    """
2557
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2558

    
2559
  def Exec(self, feedback_fn):
2560
    """Computes the list of nodes and their attributes.
2561

2562
    """
2563
    nodenames = self.nodes
2564
    volumes = self.rpc.call_node_volumes(nodenames)
2565

    
2566
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2567
             in self.cfg.GetInstanceList()]
2568

    
2569
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2570

    
2571
    output = []
2572
    for node in nodenames:
2573
      nresult = volumes[node]
2574
      if nresult.offline:
2575
        continue
2576
      msg = nresult.fail_msg
2577
      if msg:
2578
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2579
        continue
2580

    
2581
      node_vols = nresult.payload[:]
2582
      node_vols.sort(key=lambda vol: vol['dev'])
2583

    
2584
      for vol in node_vols:
2585
        node_output = []
2586
        for field in self.op.output_fields:
2587
          if field == "node":
2588
            val = node
2589
          elif field == "phys":
2590
            val = vol['dev']
2591
          elif field == "vg":
2592
            val = vol['vg']
2593
          elif field == "name":
2594
            val = vol['name']
2595
          elif field == "size":
2596
            val = int(float(vol['size']))
2597
          elif field == "instance":
2598
            for inst in ilist:
2599
              if node not in lv_by_node[inst]:
2600
                continue
2601
              if vol['name'] in lv_by_node[inst][node]:
2602
                val = inst.name
2603
                break
2604
            else:
2605
              val = '-'
2606
          else:
2607
            raise errors.ParameterError(field)
2608
          node_output.append(str(val))
2609

    
2610
        output.append(node_output)
2611

    
2612
    return output
2613

    
2614

    
2615
class LUQueryNodeStorage(NoHooksLU):
2616
  """Logical unit for getting information on storage units on node(s).
2617

2618
  """
2619
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
2620
  REQ_BGL = False
2621
  _FIELDS_STATIC = utils.FieldSet("node")
2622

    
2623
  def ExpandNames(self):
2624
    storage_type = self.op.storage_type
2625

    
2626
    if storage_type not in constants.VALID_STORAGE_FIELDS:
2627
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type)
2628

    
2629
    dynamic_fields = constants.VALID_STORAGE_FIELDS[storage_type]
2630

    
2631
    _CheckOutputFields(static=self._FIELDS_STATIC,
2632
                       dynamic=utils.FieldSet(*dynamic_fields),
2633
                       selected=self.op.output_fields)
2634

    
2635
    self.needed_locks = {}
2636
    self.share_locks[locking.LEVEL_NODE] = 1
2637

    
2638
    if self.op.nodes:
2639
      self.needed_locks[locking.LEVEL_NODE] = \
2640
        _GetWantedNodes(self, self.op.nodes)
2641
    else:
2642
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2643

    
2644
  def CheckPrereq(self):
2645
    """Check prerequisites.
2646

2647
    This checks that the fields required are valid output fields.
2648

2649
    """
2650
    self.op.name = getattr(self.op, "name", None)
2651

    
2652
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2653

    
2654
  def Exec(self, feedback_fn):
2655
    """Computes the list of nodes and their attributes.
2656

2657
    """
2658
    # Always get name to sort by
2659
    if constants.SF_NAME in self.op.output_fields:
2660
      fields = self.op.output_fields[:]
2661
    else:
2662
      fields = [constants.SF_NAME] + self.op.output_fields
2663

    
2664
    # Never ask for node as it's only known to the LU
2665
    while "node" in fields:
2666
      fields.remove("node")
2667

    
2668
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2669
    name_idx = field_idx[constants.SF_NAME]
2670

    
2671
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2672
    data = self.rpc.call_storage_list(self.nodes,
2673
                                      self.op.storage_type, st_args,
2674
                                      self.op.name, fields)
2675

    
2676
    result = []
2677

    
2678
    for node in utils.NiceSort(self.nodes):
2679
      nresult = data[node]
2680
      if nresult.offline:
2681
        continue
2682

    
2683
      msg = nresult.fail_msg
2684
      if msg:
2685
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2686
        continue
2687

    
2688
      rows = dict([(row[name_idx], row) for row in nresult.payload])
2689

    
2690
      for name in utils.NiceSort(rows.keys()):
2691
        row = rows[name]
2692

    
2693
        out = []
2694

    
2695
        for field in self.op.output_fields:
2696
          if field == "node":
2697
            val = node
2698
          elif field in field_idx:
2699
            val = row[field_idx[field]]
2700
          else:
2701
            raise errors.ParameterError(field)
2702

    
2703
          out.append(val)
2704

    
2705
        result.append(out)
2706

    
2707
    return result
2708

    
2709

    
2710
class LUModifyNodeStorage(NoHooksLU):
2711
  """Logical unit for modifying a storage volume on a node.
2712

2713
  """
2714
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
2715
  REQ_BGL = False
2716

    
2717
  def CheckArguments(self):
2718
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2719
    if node_name is None:
2720
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2721

    
2722
    self.op.node_name = node_name
2723

    
2724
    storage_type = self.op.storage_type
2725
    if storage_type not in constants.VALID_STORAGE_FIELDS:
2726
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type)
2727

    
2728
  def ExpandNames(self):
2729
    self.needed_locks = {
2730
      locking.LEVEL_NODE: self.op.node_name,
2731
      }
2732

    
2733
  def CheckPrereq(self):
2734
    """Check prerequisites.
2735

2736
    """
2737
    storage_type = self.op.storage_type
2738

    
2739
    try:
2740
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
2741
    except KeyError:
2742
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
2743
                                 " modified" % storage_type)
2744

    
2745
    diff = set(self.op.changes.keys()) - modifiable
2746
    if diff:
2747
      raise errors.OpPrereqError("The following fields can not be modified for"
2748
                                 " storage units of type '%s': %r" %
2749
                                 (storage_type, list(diff)))
2750

    
2751
  def Exec(self, feedback_fn):
2752
    """Computes the list of nodes and their attributes.
2753

2754
    """
2755
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2756
    result = self.rpc.call_storage_modify(self.op.node_name,
2757
                                          self.op.storage_type, st_args,
2758
                                          self.op.name, self.op.changes)
2759
    result.Raise("Failed to modify storage unit '%s' on %s" %
2760
                 (self.op.name, self.op.node_name))
2761

    
2762

    
2763
class LUAddNode(LogicalUnit):
2764
  """Logical unit for adding node to the cluster.
2765

2766
  """
2767
  HPATH = "node-add"
2768
  HTYPE = constants.HTYPE_NODE
2769
  _OP_REQP = ["node_name"]
2770

    
2771
  def BuildHooksEnv(self):
2772
    """Build hooks env.
2773

2774
    This will run on all nodes before, and on all nodes + the new node after.
2775

2776
    """
2777
    env = {
2778
      "OP_TARGET": self.op.node_name,
2779
      "NODE_NAME": self.op.node_name,
2780
      "NODE_PIP": self.op.primary_ip,
2781
      "NODE_SIP": self.op.secondary_ip,
2782
      }
2783
    nodes_0 = self.cfg.GetNodeList()
2784
    nodes_1 = nodes_0 + [self.op.node_name, ]
2785
    return env, nodes_0, nodes_1
2786

    
2787
  def CheckPrereq(self):
2788
    """Check prerequisites.
2789

2790
    This checks:
2791
     - the new node is not already in the config
2792
     - it is resolvable
2793
     - its parameters (single/dual homed) matches the cluster
2794

2795
    Any errors are signaled by raising errors.OpPrereqError.
2796

2797
    """
2798
    node_name = self.op.node_name
2799
    cfg = self.cfg
2800

    
2801
    dns_data = utils.HostInfo(node_name)
2802

    
2803
    node = dns_data.name
2804
    primary_ip = self.op.primary_ip = dns_data.ip
2805
    secondary_ip = getattr(self.op, "secondary_ip", None)
2806
    if secondary_ip is None:
2807
      secondary_ip = primary_ip
2808
    if not utils.IsValidIP(secondary_ip):
2809
      raise errors.OpPrereqError("Invalid secondary IP given")
2810
    self.op.secondary_ip = secondary_ip
2811

    
2812
    node_list = cfg.GetNodeList()
2813
    if not self.op.readd and node in node_list:
2814
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2815
                                 node)
2816
    elif self.op.readd and node not in node_list:
2817
      raise errors.OpPrereqError("Node %s is not in the configuration" % node)
2818

    
2819
    for existing_node_name in node_list:
2820
      existing_node = cfg.GetNodeInfo(existing_node_name)
2821

    
2822
      if self.op.readd and node == existing_node_name:
2823
        if (existing_node.primary_ip != primary_ip or
2824
            existing_node.secondary_ip != secondary_ip):
2825
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2826
                                     " address configuration as before")
2827
        continue
2828

    
2829
      if (existing_node.primary_ip == primary_ip or
2830
          existing_node.secondary_ip == primary_ip or
2831
          existing_node.primary_ip == secondary_ip or
2832
          existing_node.secondary_ip == secondary_ip):
2833
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2834
                                   " existing node %s" % existing_node.name)
2835

    
2836
    # check that the type of the node (single versus dual homed) is the
2837
    # same as for the master
2838
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2839
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2840
    newbie_singlehomed = secondary_ip == primary_ip
2841
    if master_singlehomed != newbie_singlehomed:
2842
      if master_singlehomed:
2843
        raise errors.OpPrereqError("The master has no private ip but the"
2844
                                   " new node has one")
2845
      else:
2846
        raise errors.OpPrereqError("The master has a private ip but the"
2847
                                   " new node doesn't have one")
2848

    
2849
    # checks reachability
2850
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2851
      raise errors.OpPrereqError("Node not reachable by ping")
2852

    
2853
    if not newbie_singlehomed:
2854
      # check reachability from my secondary ip to newbie's secondary ip
2855
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2856
                           source=myself.secondary_ip):
2857
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2858
                                   " based ping to noded port")
2859

    
2860
    if self.op.readd:
2861
      exceptions = [node]
2862
    else:
2863
      exceptions = []
2864

    
2865
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
2866

    
2867
    if self.op.readd:
2868
      self.new_node = self.cfg.GetNodeInfo(node)
2869
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
2870
    else:
2871
      self.new_node = objects.Node(name=node,
2872
                                   primary_ip=primary_ip,
2873
                                   secondary_ip=secondary_ip,
2874
                                   master_candidate=self.master_candidate,
2875
                                   offline=False, drained=False)
2876

    
2877
  def Exec(self, feedback_fn):
2878
    """Adds the new node to the cluster.
2879

2880
    """
2881
    new_node = self.new_node
2882
    node = new_node.name
2883

    
2884
    # for re-adds, reset the offline/drained/master-candidate flags;
2885
    # we need to reset here, otherwise offline would prevent RPC calls
2886
    # later in the procedure; this also means that if the re-add
2887
    # fails, we are left with a non-offlined, broken node
2888
    if self.op.readd:
2889
      new_node.drained = new_node.offline = False
2890
      self.LogInfo("Readding a node, the offline/drained flags were reset")
2891
      # if we demote the node, we do cleanup later in the procedure
2892
      new_node.master_candidate = self.master_candidate
2893

    
2894
    # notify the user about any possible mc promotion
2895
    if new_node.master_candidate:
2896
      self.LogInfo("Node will be a master candidate")
2897

    
2898
    # check connectivity
2899
    result = self.rpc.call_version([node])[node]
2900
    result.Raise("Can't get version information from node %s" % node)
2901
    if constants.PROTOCOL_VERSION == result.payload:
2902
      logging.info("Communication to node %s fine, sw version %s match",
2903
                   node, result.payload)
2904
    else:
2905
      raise errors.OpExecError("Version mismatch master version %s,"
2906
                               " node version %s" %
2907
                               (constants.PROTOCOL_VERSION, result.payload))
2908

    
2909
    # setup ssh on node
2910
    if self.cfg.GetClusterInfo().modify_ssh_setup:
2911
      logging.info("Copy ssh key to node %s", node)
2912
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
2913
      keyarray = []
2914
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
2915
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
2916
                  priv_key, pub_key]
2917

    
2918
      for i in keyfiles:
2919
        keyarray.append(utils.ReadFile(i))
2920

    
2921
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
2922
                                      keyarray[2], keyarray[3], keyarray[4],
2923
                                      keyarray[5])
2924
      result.Raise("Cannot transfer ssh keys to the new node")
2925

    
2926
    # Add node to our /etc/hosts, and add key to known_hosts
2927
    if self.cfg.GetClusterInfo().modify_etc_hosts:
2928
      utils.AddHostToEtcHosts(new_node.name)
2929

    
2930
    if new_node.secondary_ip != new_node.primary_ip:
2931
      result = self.rpc.call_node_has_ip_address(new_node.name,
2932
                                                 new_node.secondary_ip)
2933
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
2934
                   prereq=True)
2935
      if not result.payload:
2936
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
2937
                                 " you gave (%s). Please fix and re-run this"
2938
                                 " command." % new_node.secondary_ip)
2939

    
2940
    node_verify_list = [self.cfg.GetMasterNode()]
2941
    node_verify_param = {
2942
      constants.NV_NODELIST: [node],
2943
      # TODO: do a node-net-test as well?
2944
    }
2945

    
2946
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2947
                                       self.cfg.GetClusterName())
2948
    for verifier in node_verify_list:
2949
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
2950
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
2951
      if nl_payload:
2952
        for failed in nl_payload:
2953
          feedback_fn("ssh/hostname verification failed"
2954
                      " (checking from %s): %s" %
2955
                      (verifier, nl_payload[failed]))
2956
        raise errors.OpExecError("ssh/hostname verification failed.")
2957

    
2958
    if self.op.readd:
2959
      _RedistributeAncillaryFiles(self)
2960
      self.context.ReaddNode(new_node)
2961
      # make sure we redistribute the config
2962
      self.cfg.Update(new_node, feedback_fn)
2963
      # and make sure the new node will not have old files around
2964
      if not new_node.master_candidate:
2965
        result = self.rpc.call_node_demote_from_mc(new_node.name)
2966
        msg = result.fail_msg
2967
        if msg:
2968
          self.LogWarning("Node failed to demote itself from master"
2969
                          " candidate status: %s" % msg)
2970
    else:
2971
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
2972
      self.context.AddNode(new_node)
2973

    
2974

    
2975
class LUSetNodeParams(LogicalUnit):
2976
  """Modifies the parameters of a node.
2977

2978
  """
2979
  HPATH = "node-modify"
2980
  HTYPE = constants.HTYPE_NODE
2981
  _OP_REQP = ["node_name"]
2982
  REQ_BGL = False
2983

    
2984
  def CheckArguments(self):
2985
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2986
    if node_name is None:
2987
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2988
    self.op.node_name = node_name
2989
    _CheckBooleanOpField(self.op, 'master_candidate')
2990
    _CheckBooleanOpField(self.op, 'offline')
2991
    _CheckBooleanOpField(self.op, 'drained')
2992
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
2993
    if all_mods.count(None) == 3:
2994
      raise errors.OpPrereqError("Please pass at least one modification")
2995
    if all_mods.count(True) > 1:
2996
      raise errors.OpPrereqError("Can't set the node into more than one"
2997
                                 " state at the same time")
2998

    
2999
  def ExpandNames(self):
3000
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3001

    
3002
  def BuildHooksEnv(self):
3003
    """Build hooks env.
3004

3005
    This runs on the master node.
3006

3007
    """
3008
    env = {
3009
      "OP_TARGET": self.op.node_name,
3010
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3011
      "OFFLINE": str(self.op.offline),
3012
      "DRAINED": str(self.op.drained),
3013
      }
3014
    nl = [self.cfg.GetMasterNode(),
3015
          self.op.node_name]
3016
    return env, nl, nl
3017

    
3018
  def CheckPrereq(self):
3019
    """Check prerequisites.
3020

3021
    This only checks the instance list against the existing names.
3022

3023
    """
3024
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3025

    
3026
    if (self.op.master_candidate is not None or
3027
        self.op.drained is not None or
3028
        self.op.offline is not None):
3029
      # we can't change the master's node flags
3030
      if self.op.node_name == self.cfg.GetMasterNode():
3031
        raise errors.OpPrereqError("The master role can be changed"
3032
                                   " only via masterfailover")
3033

    
3034
    # Boolean value that tells us whether we're offlining or draining the node
3035
    offline_or_drain = self.op.offline == True or self.op.drained == True
3036
    deoffline_or_drain = self.op.offline == False or self.op.drained == False
3037

    
3038
    if (node.master_candidate and
3039
        (self.op.master_candidate == False or offline_or_drain)):
3040
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
3041
      mc_now, mc_should, mc_max = self.cfg.GetMasterCandidateStats()
3042
      if mc_now <= cp_size:
3043
        msg = ("Not enough master candidates (desired"
3044
               " %d, new value will be %d)" % (cp_size, mc_now-1))
3045
        # Only allow forcing the operation if it's an offline/drain operation,
3046
        # and we could not possibly promote more nodes.
3047
        # FIXME: this can still lead to issues if in any way another node which
3048
        # could be promoted appears in the meantime.
3049
        if self.op.force and offline_or_drain and mc_should == mc_max:
3050
          self.LogWarning(msg)
3051
        else:
3052
          raise errors.OpPrereqError(msg)
3053

    
3054
    if (self.op.master_candidate == True and
3055
        ((node.offline and not self.op.offline == False) or
3056
         (node.drained and not self.op.drained == False))):
3057
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3058
                                 " to master_candidate" % node.name)
3059

    
3060
    # If we're being deofflined/drained, we'll MC ourself if needed
3061
    if (deoffline_or_drain and not offline_or_drain and not
3062
        self.op.master_candidate == True):
3063
      self.op.master_candidate = _DecideSelfPromotion(self)
3064
      if self.op.master_candidate:
3065
        self.LogInfo("Autopromoting node to master candidate")
3066

    
3067
    return
3068

    
3069
  def Exec(self, feedback_fn):
3070
    """Modifies a node.
3071

3072
    """
3073
    node = self.node
3074

    
3075
    result = []
3076
    changed_mc = False
3077

    
3078
    if self.op.offline is not None:
3079
      node.offline = self.op.offline
3080
      result.append(("offline", str(self.op.offline)))
3081
      if self.op.offline == True:
3082
        if node.master_candidate:
3083
          node.master_candidate = False
3084
          changed_mc = True
3085
          result.append(("master_candidate", "auto-demotion due to offline"))
3086
        if node.drained:
3087
          node.drained = False
3088
          result.append(("drained", "clear drained status due to offline"))
3089

    
3090
    if self.op.master_candidate is not None:
3091
      node.master_candidate = self.op.master_candidate
3092
      changed_mc = True
3093
      result.append(("master_candidate", str(self.op.master_candidate)))
3094
      if self.op.master_candidate == False:
3095
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3096
        msg = rrc.fail_msg
3097
        if msg:
3098
          self.LogWarning("Node failed to demote itself: %s" % msg)
3099

    
3100
    if self.op.drained is not None:
3101
      node.drained = self.op.drained
3102
      result.append(("drained", str(self.op.drained)))
3103
      if self.op.drained == True:
3104
        if node.master_candidate:
3105
          node.master_candidate = False
3106
          changed_mc = True
3107
          result.append(("master_candidate", "auto-demotion due to drain"))
3108
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3109
          msg = rrc.fail_msg
3110
          if msg:
3111
            self.LogWarning("Node failed to demote itself: %s" % msg)
3112
        if node.offline:
3113
          node.offline = False
3114
          result.append(("offline", "clear offline status due to drain"))
3115

    
3116
    # this will trigger configuration file update, if needed
3117
    self.cfg.Update(node, feedback_fn)
3118
    # this will trigger job queue propagation or cleanup
3119
    if changed_mc:
3120
      self.context.ReaddNode(node)
3121

    
3122
    return result
3123

    
3124

    
3125
class LUPowercycleNode(NoHooksLU):
3126
  """Powercycles a node.
3127

3128
  """
3129
  _OP_REQP = ["node_name", "force"]
3130
  REQ_BGL = False
3131

    
3132
  def CheckArguments(self):
3133
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
3134
    if node_name is None:
3135
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
3136
    self.op.node_name = node_name
3137
    if node_name == self.cfg.GetMasterNode() and not self.op.force:
3138
      raise errors.OpPrereqError("The node is the master and the force"
3139
                                 " parameter was not set")
3140

    
3141
  def ExpandNames(self):
3142
    """Locking for PowercycleNode.
3143

3144
    This is a last-resort option and shouldn't block on other
3145
    jobs. Therefore, we grab no locks.
3146

3147
    """
3148
    self.needed_locks = {}
3149

    
3150
  def CheckPrereq(self):
3151
    """Check prerequisites.
3152

3153
    This LU has no prereqs.
3154

3155
    """
3156
    pass
3157

    
3158
  def Exec(self, feedback_fn):
3159
    """Reboots a node.
3160

3161
    """
3162
    result = self.rpc.call_node_powercycle(self.op.node_name,
3163
                                           self.cfg.GetHypervisorType())
3164
    result.Raise("Failed to schedule the reboot")
3165
    return result.payload
3166

    
3167

    
3168
class LUQueryClusterInfo(NoHooksLU):
3169
  """Query cluster configuration.
3170

3171
  """
3172
  _OP_REQP = []
3173
  REQ_BGL = False
3174

    
3175
  def ExpandNames(self):
3176
    self.needed_locks = {}
3177

    
3178
  def CheckPrereq(self):
3179
    """No prerequsites needed for this LU.
3180

3181
    """
3182
    pass
3183

    
3184
  def Exec(self, feedback_fn):
3185
    """Return cluster config.
3186

3187
    """
3188
    cluster = self.cfg.GetClusterInfo()
3189
    result = {
3190
      "software_version": constants.RELEASE_VERSION,
3191
      "protocol_version": constants.PROTOCOL_VERSION,
3192
      "config_version": constants.CONFIG_VERSION,
3193
      "os_api_version": max(constants.OS_API_VERSIONS),
3194
      "export_version": constants.EXPORT_VERSION,
3195
      "architecture": (platform.architecture()[0], platform.machine()),
3196
      "name": cluster.cluster_name,
3197
      "master": cluster.master_node,
3198
      "default_hypervisor": cluster.enabled_hypervisors[0],
3199
      "enabled_hypervisors": cluster.enabled_hypervisors,
3200
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3201
                        for hypervisor_name in cluster.enabled_hypervisors]),
3202
      "beparams": cluster.beparams,
3203
      "nicparams": cluster.nicparams,
3204
      "candidate_pool_size": cluster.candidate_pool_size,
3205
      "master_netdev": cluster.master_netdev,
3206
      "volume_group_name": cluster.volume_group_name,
3207
      "file_storage_dir": cluster.file_storage_dir,
3208
      "ctime": cluster.ctime,
3209
      "mtime": cluster.mtime,
3210
      "uuid": cluster.uuid,
3211
      "tags": list(cluster.GetTags()),
3212
      }
3213

    
3214
    return result
3215

    
3216

    
3217
class LUQueryConfigValues(NoHooksLU):
3218
  """Return configuration values.
3219

3220
  """
3221
  _OP_REQP = []
3222
  REQ_BGL = False
3223
  _FIELDS_DYNAMIC = utils.FieldSet()
3224
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3225
                                  "watcher_pause")
3226

    
3227
  def ExpandNames(self):
3228
    self.needed_locks = {}
3229

    
3230
    _CheckOutputFields(static=self._FIELDS_STATIC,
3231
                       dynamic=self._FIELDS_DYNAMIC,
3232
                       selected=self.op.output_fields)
3233

    
3234
  def CheckPrereq(self):
3235
    """No prerequisites.
3236

3237
    """
3238
    pass
3239

    
3240
  def Exec(self, feedback_fn):
3241
    """Dump a representation of the cluster config to the standard output.
3242

3243
    """
3244
    values = []
3245
    for field in self.op.output_fields:
3246
      if field == "cluster_name":
3247
        entry = self.cfg.GetClusterName()
3248
      elif field == "master_node":
3249
        entry = self.cfg.GetMasterNode()
3250
      elif field == "drain_flag":
3251
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3252
      elif field == "watcher_pause":
3253
        return utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3254
      else:
3255
        raise errors.ParameterError(field)
3256
      values.append(entry)
3257
    return values
3258

    
3259

    
3260
class LUActivateInstanceDisks(NoHooksLU):
3261
  """Bring up an instance's disks.
3262

3263
  """
3264
  _OP_REQP = ["instance_name"]
3265
  REQ_BGL = False
3266

    
3267
  def ExpandNames(self):
3268
    self._ExpandAndLockInstance()
3269
    self.needed_locks[locking.LEVEL_NODE] = []
3270
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3271

    
3272
  def DeclareLocks(self, level):
3273
    if level == locking.LEVEL_NODE:
3274
      self._LockInstancesNodes()
3275

    
3276
  def CheckPrereq(self):
3277
    """Check prerequisites.
3278

3279
    This checks that the instance is in the cluster.
3280

3281
    """
3282
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3283
    assert self.instance is not None, \
3284
      "Cannot retrieve locked instance %s" % self.op.instance_name
3285
    _CheckNodeOnline(self, self.instance.primary_node)
3286
    if not hasattr(self.op, "ignore_size"):
3287
      self.op.ignore_size = False
3288

    
3289
  def Exec(self, feedback_fn):
3290
    """Activate the disks.
3291

3292
    """
3293
    disks_ok, disks_info = \
3294
              _AssembleInstanceDisks(self, self.instance,
3295
                                     ignore_size=self.op.ignore_size)
3296
    if not disks_ok:
3297
      raise errors.OpExecError("Cannot activate block devices")
3298

    
3299
    return disks_info
3300

    
3301

    
3302
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3303
                           ignore_size=False):
3304
  """Prepare the block devices for an instance.
3305

3306
  This sets up the block devices on all nodes.
3307

3308
  @type lu: L{LogicalUnit}
3309
  @param lu: the logical unit on whose behalf we execute
3310
  @type instance: L{objects.Instance}
3311
  @param instance: the instance for whose disks we assemble
3312
  @type ignore_secondaries: boolean
3313
  @param ignore_secondaries: if true, errors on secondary nodes
3314
      won't result in an error return from the function
3315
  @type ignore_size: boolean
3316
  @param ignore_size: if true, the current known size of the disk
3317
      will not be used during the disk activation, useful for cases
3318
      when the size is wrong
3319
  @return: False if the operation failed, otherwise a list of
3320
      (host, instance_visible_name, node_visible_name)
3321
      with the mapping from node devices to instance devices
3322

3323
  """
3324
  device_info = []
3325
  disks_ok = True
3326
  iname = instance.name
3327
  # With the two passes mechanism we try to reduce the window of
3328
  # opportunity for the race condition of switching DRBD to primary
3329
  # before handshaking occured, but we do not eliminate it
3330

    
3331
  # The proper fix would be to wait (with some limits) until the
3332
  # connection has been made and drbd transitions from WFConnection
3333
  # into any other network-connected state (Connected, SyncTarget,
3334
  # SyncSource, etc.)
3335

    
3336
  # 1st pass, assemble on all nodes in secondary mode
3337
  for inst_disk in instance.disks:
3338
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3339
      if ignore_size:
3340
        node_disk = node_disk.Copy()
3341
        node_disk.UnsetSize()
3342
      lu.cfg.SetDiskID(node_disk, node)
3343
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3344
      msg = result.fail_msg
3345
      if msg:
3346
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3347
                           " (is_primary=False, pass=1): %s",
3348
                           inst_disk.iv_name, node, msg)
3349
        if not ignore_secondaries:
3350
          disks_ok = False
3351

    
3352
  # FIXME: race condition on drbd migration to primary
3353

    
3354
  # 2nd pass, do only the primary node
3355
  for inst_disk in instance.disks:
3356
    dev_path = None
3357

    
3358
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3359
      if node != instance.primary_node:
3360
        continue
3361
      if ignore_size:
3362
        node_disk = node_disk.Copy()
3363
        node_disk.UnsetSize()
3364
      lu.cfg.SetDiskID(node_disk, node)
3365
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3366
      msg = result.fail_msg
3367
      if msg:
3368
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3369
                           " (is_primary=True, pass=2): %s",
3370
                           inst_disk.iv_name, node, msg)
3371
        disks_ok = False
3372
      else:
3373
        dev_path = result.payload
3374

    
3375
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3376

    
3377
  # leave the disks configured for the primary node
3378
  # this is a workaround that would be fixed better by
3379
  # improving the logical/physical id handling
3380
  for disk in instance.disks:
3381
    lu.cfg.SetDiskID(disk, instance.primary_node)
3382

    
3383
  return disks_ok, device_info
3384

    
3385

    
3386
def _StartInstanceDisks(lu, instance, force):
3387
  """Start the disks of an instance.
3388

3389
  """
3390
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3391
                                           ignore_secondaries=force)
3392
  if not disks_ok:
3393
    _ShutdownInstanceDisks(lu, instance)
3394
    if force is not None and not force:
3395
      lu.proc.LogWarning("", hint="If the message above refers to a"
3396
                         " secondary node,"
3397
                         " you can retry the operation using '--force'.")
3398
    raise errors.OpExecError("Disk consistency error")
3399

    
3400

    
3401
class LUDeactivateInstanceDisks(NoHooksLU):
3402
  """Shutdown an instance's disks.
3403

3404
  """
3405
  _OP_REQP = ["instance_name"]
3406
  REQ_BGL = False
3407

    
3408
  def ExpandNames(self):
3409
    self._ExpandAndLockInstance()
3410
    self.needed_locks[locking.LEVEL_NODE] = []
3411
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3412

    
3413
  def DeclareLocks(self, level):
3414
    if level == locking.LEVEL_NODE:
3415
      self._LockInstancesNodes()
3416

    
3417
  def CheckPrereq(self):
3418
    """Check prerequisites.
3419

3420
    This checks that the instance is in the cluster.
3421

3422
    """
3423
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3424
    assert self.instance is not None, \
3425
      "Cannot retrieve locked instance %s" % self.op.instance_name
3426

    
3427
  def Exec(self, feedback_fn):
3428
    """Deactivate the disks
3429

3430
    """
3431
    instance = self.instance
3432
    _SafeShutdownInstanceDisks(self, instance)
3433

    
3434

    
3435
def _SafeShutdownInstanceDisks(lu, instance):
3436
  """Shutdown block devices of an instance.
3437

3438
  This function checks if an instance is running, before calling
3439
  _ShutdownInstanceDisks.
3440

3441
  """
3442
  pnode = instance.primary_node
3443
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
3444
  ins_l.Raise("Can't contact node %s" % pnode)
3445

    
3446
  if instance.name in ins_l.payload:
3447
    raise errors.OpExecError("Instance is running, can't shutdown"
3448
                             " block devices.")
3449

    
3450
  _ShutdownInstanceDisks(lu, instance)
3451

    
3452

    
3453
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3454
  """Shutdown block devices of an instance.
3455

3456
  This does the shutdown on all nodes of the instance.
3457

3458
  If the ignore_primary is false, errors on the primary node are
3459
  ignored.
3460

3461
  """
3462
  all_result = True
3463
  for disk in instance.disks:
3464
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3465
      lu.cfg.SetDiskID(top_disk, node)
3466
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3467
      msg = result.fail_msg
3468
      if msg:
3469
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3470
                      disk.iv_name, node, msg)
3471
        if not ignore_primary or node != instance.primary_node:
3472
          all_result = False
3473
  return all_result
3474

    
3475

    
3476
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3477
  """Checks if a node has enough free memory.
3478

3479
  This function check if a given node has the needed amount of free
3480
  memory. In case the node has less memory or we cannot get the
3481
  information from the node, this function raise an OpPrereqError
3482
  exception.
3483

3484
  @type lu: C{LogicalUnit}
3485
  @param lu: a logical unit from which we get configuration data
3486
  @type node: C{str}
3487
  @param node: the node to check
3488
  @type reason: C{str}
3489
  @param reason: string to use in the error message
3490
  @type requested: C{int}
3491
  @param requested: the amount of memory in MiB to check for
3492
  @type hypervisor_name: C{str}
3493
  @param hypervisor_name: the hypervisor to ask for memory stats
3494
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3495
      we cannot check the node
3496

3497
  """
3498
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3499
  nodeinfo[node].Raise("Can't get data from node %s" % node, prereq=True)
3500
  free_mem = nodeinfo[node].payload.get('memory_free', None)
3501
  if not isinstance(free_mem, int):
3502
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3503
                               " was '%s'" % (node, free_mem))
3504
  if requested > free_mem:
3505
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3506
                               " needed %s MiB, available %s MiB" %
3507
                               (node, reason, requested, free_mem))
3508

    
3509

    
3510
class LUStartupInstance(LogicalUnit):
3511
  """Starts an instance.
3512

3513
  """
3514
  HPATH = "instance-start"
3515
  HTYPE = constants.HTYPE_INSTANCE
3516
  _OP_REQP = ["instance_name", "force"]
3517
  REQ_BGL = False
3518

    
3519
  def ExpandNames(self):
3520
    self._ExpandAndLockInstance()
3521

    
3522
  def BuildHooksEnv(self):
3523
    """Build hooks env.
3524

3525
    This runs on master, primary and secondary nodes of the instance.
3526

3527
    """
3528
    env = {
3529
      "FORCE": self.op.force,
3530
      }
3531
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3532
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3533
    return env, nl, nl
3534

    
3535
  def CheckPrereq(self):
3536
    """Check prerequisites.
3537

3538
    This checks that the instance is in the cluster.
3539

3540
    """
3541
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3542
    assert self.instance is not None, \
3543
      "Cannot retrieve locked instance %s" % self.op.instance_name
3544

    
3545
    # extra beparams
3546
    self.beparams = getattr(self.op, "beparams", {})
3547
    if self.beparams:
3548
      if not isinstance(self.beparams, dict):
3549
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3550
                                   " dict" % (type(self.beparams), ))
3551
      # fill the beparams dict
3552
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3553
      self.op.beparams = self.beparams
3554

    
3555
    # extra hvparams
3556
    self.hvparams = getattr(self.op, "hvparams", {})
3557
    if self.hvparams:
3558
      if not isinstance(self.hvparams, dict):
3559
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3560
                                   " dict" % (type(self.hvparams), ))
3561

    
3562
      # check hypervisor parameter syntax (locally)
3563
      cluster = self.cfg.GetClusterInfo()
3564
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3565
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3566
                                    instance.hvparams)
3567
      filled_hvp.update(self.hvparams)
3568
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3569
      hv_type.CheckParameterSyntax(filled_hvp)
3570
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3571
      self.op.hvparams = self.hvparams
3572

    
3573
    _CheckNodeOnline(self, instance.primary_node)
3574

    
3575
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3576
    # check bridges existence
3577
    _CheckInstanceBridgesExist(self, instance)
3578

    
3579
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3580
                                              instance.name,
3581
                                              instance.hypervisor)
3582
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3583
                      prereq=True)
3584
    if not remote_info.payload: # not running already
3585
      _CheckNodeFreeMemory(self, instance.primary_node,
3586
                           "starting instance %s" % instance.name,
3587
                           bep[constants.BE_MEMORY], instance.hypervisor)
3588

    
3589
  def Exec(self, feedback_fn):
3590
    """Start the instance.
3591

3592
    """
3593
    instance = self.instance
3594
    force = self.op.force
3595

    
3596
    self.cfg.MarkInstanceUp(instance.name)
3597

    
3598
    node_current = instance.primary_node
3599

    
3600
    _StartInstanceDisks(self, instance, force)
3601

    
3602
    result = self.rpc.call_instance_start(node_current, instance,
3603
                                          self.hvparams, self.beparams)
3604
    msg = result.fail_msg
3605
    if msg:
3606
      _ShutdownInstanceDisks(self, instance)
3607
      raise errors.OpExecError("Could not start instance: %s" % msg)
3608

    
3609

    
3610
class LURebootInstance(LogicalUnit):
3611
  """Reboot an instance.
3612

3613
  """
3614
  HPATH = "instance-reboot"
3615
  HTYPE = constants.HTYPE_INSTANCE
3616
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3617
  REQ_BGL = False
3618

    
3619
  def CheckArguments(self):
3620
    """Check the arguments.
3621

3622
    """
3623
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
3624
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
3625

    
3626
  def ExpandNames(self):
3627
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3628
                                   constants.INSTANCE_REBOOT_HARD,
3629
                                   constants.INSTANCE_REBOOT_FULL]:
3630
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3631
                                  (constants.INSTANCE_REBOOT_SOFT,
3632
                                   constants.INSTANCE_REBOOT_HARD,
3633
                                   constants.INSTANCE_REBOOT_FULL))
3634
    self._ExpandAndLockInstance()
3635

    
3636
  def BuildHooksEnv(self):
3637
    """Build hooks env.
3638

3639
    This runs on master, primary and secondary nodes of the instance.
3640

3641
    """
3642
    env = {
3643
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3644
      "REBOOT_TYPE": self.op.reboot_type,
3645
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
3646
      }
3647
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3648
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3649
    return env, nl, nl
3650

    
3651
  def CheckPrereq(self):
3652
    """Check prerequisites.
3653

3654
    This checks that the instance is in the cluster.
3655

3656
    """
3657
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3658
    assert self.instance is not None, \
3659
      "Cannot retrieve locked instance %s" % self.op.instance_name
3660

    
3661
    _CheckNodeOnline(self, instance.primary_node)
3662

    
3663
    # check bridges existence
3664
    _CheckInstanceBridgesExist(self, instance)
3665

    
3666
  def Exec(self, feedback_fn):
3667
    """Reboot the instance.
3668

3669
    """
3670
    instance = self.instance
3671
    ignore_secondaries = self.op.ignore_secondaries
3672
    reboot_type = self.op.reboot_type
3673

    
3674
    node_current = instance.primary_node
3675

    
3676
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3677
                       constants.INSTANCE_REBOOT_HARD]:
3678
      for disk in instance.disks:
3679
        self.cfg.SetDiskID(disk, node_current)
3680
      result = self.rpc.call_instance_reboot(node_current, instance,
3681
                                             reboot_type,
3682
                                             self.shutdown_timeout)
3683
      result.Raise("Could not reboot instance")
3684
    else:
3685
      result = self.rpc.call_instance_shutdown(node_current, instance,
3686
                                               self.shutdown_timeout)
3687
      result.Raise("Could not shutdown instance for full reboot")
3688
      _ShutdownInstanceDisks(self, instance)
3689
      _StartInstanceDisks(self, instance, ignore_secondaries)
3690
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3691
      msg = result.fail_msg
3692
      if msg:
3693
        _ShutdownInstanceDisks(self, instance)
3694
        raise errors.OpExecError("Could not start instance for"
3695
                                 " full reboot: %s" % msg)
3696

    
3697
    self.cfg.MarkInstanceUp(instance.name)
3698

    
3699

    
3700
class LUShutdownInstance(LogicalUnit):
3701
  """Shutdown an instance.
3702

3703
  """
3704
  HPATH = "instance-stop"
3705
  HTYPE = constants.HTYPE_INSTANCE
3706
  _OP_REQP = ["instance_name"]
3707
  REQ_BGL = False
3708

    
3709
  def CheckArguments(self):
3710
    """Check the arguments.
3711

3712
    """
3713
    self.timeout = getattr(self.op, "timeout",
3714
                           constants.DEFAULT_SHUTDOWN_TIMEOUT)
3715

    
3716
  def ExpandNames(self):
3717
    self._ExpandAndLockInstance()
3718

    
3719
  def BuildHooksEnv(self):
3720
    """Build hooks env.
3721

3722
    This runs on master, primary and secondary nodes of the instance.
3723

3724
    """
3725
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3726
    env["TIMEOUT"] = self.timeout
3727
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3728
    return env, nl, nl
3729

    
3730
  def CheckPrereq(self):
3731
    """Check prerequisites.
3732

3733
    This checks that the instance is in the cluster.
3734

3735
    """
3736
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3737
    assert self.instance is not None, \
3738
      "Cannot retrieve locked instance %s" % self.op.instance_name
3739
    _CheckNodeOnline(self, self.instance.primary_node)
3740

    
3741
  def Exec(self, feedback_fn):
3742
    """Shutdown the instance.
3743

3744
    """
3745
    instance = self.instance
3746
    node_current = instance.primary_node
3747
    timeout = self.timeout
3748
    self.cfg.MarkInstanceDown(instance.name)
3749
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
3750
    msg = result.fail_msg
3751
    if msg:
3752
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3753

    
3754
    _ShutdownInstanceDisks(self, instance)
3755

    
3756

    
3757
class LUReinstallInstance(LogicalUnit):
3758
  """Reinstall an instance.
3759

3760
  """
3761
  HPATH = "instance-reinstall"
3762
  HTYPE = constants.HTYPE_INSTANCE
3763
  _OP_REQP = ["instance_name"]
3764
  REQ_BGL = False
3765

    
3766
  def ExpandNames(self):
3767
    self._ExpandAndLockInstance()
3768

    
3769
  def BuildHooksEnv(self):
3770
    """Build hooks env.
3771

3772
    This runs on master, primary and secondary nodes of the instance.
3773

3774
    """
3775
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3776
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3777
    return env, nl, nl
3778

    
3779
  def CheckPrereq(self):
3780
    """Check prerequisites.
3781

3782
    This checks that the instance is in the cluster and is not running.
3783

3784
    """
3785
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3786
    assert instance is not None, \
3787
      "Cannot retrieve locked instance %s" % self.op.instance_name
3788
    _CheckNodeOnline(self, instance.primary_node)
3789

    
3790
    if instance.disk_template == constants.DT_DISKLESS:
3791
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3792
                                 self.op.instance_name)
3793
    if instance.admin_up:
3794
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3795
                                 self.op.instance_name)
3796
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3797
                                              instance.name,
3798
                                              instance.hypervisor)
3799
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3800
                      prereq=True)
3801
    if remote_info.payload:
3802
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3803
                                 (self.op.instance_name,
3804
                                  instance.primary_node))
3805

    
3806
    self.op.os_type = getattr(self.op, "os_type", None)
3807
    self.op.force_variant = getattr(self.op, "force_variant", False)
3808
    if self.op.os_type is not None:
3809
      # OS verification
3810
      pnode = self.cfg.GetNodeInfo(
3811
        self.cfg.ExpandNodeName(instance.primary_node))
3812
      if pnode is None:
3813
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
3814
                                   self.op.pnode)
3815
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3816
      result.Raise("OS '%s' not in supported OS list for primary node %s" %
3817
                   (self.op.os_type, pnode.name), prereq=True)
3818
      if not self.op.force_variant:
3819
        _CheckOSVariant(result.payload, self.op.os_type)
3820

    
3821
    self.instance = instance
3822

    
3823
  def Exec(self, feedback_fn):
3824
    """Reinstall the instance.
3825

3826
    """
3827
    inst = self.instance
3828

    
3829
    if self.op.os_type is not None:
3830
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3831
      inst.os = self.op.os_type
3832
      self.cfg.Update(inst, feedback_fn)
3833

    
3834
    _StartInstanceDisks(self, inst, None)
3835
    try:
3836
      feedback_fn("Running the instance OS create scripts...")
3837
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
3838
      result.Raise("Could not install OS for instance %s on node %s" %
3839
                   (inst.name, inst.primary_node))
3840
    finally:
3841
      _ShutdownInstanceDisks(self, inst)
3842

    
3843

    
3844
class LURecreateInstanceDisks(LogicalUnit):
3845
  """Recreate an instance's missing disks.
3846

3847
  """
3848
  HPATH = "instance-recreate-disks"
3849
  HTYPE = constants.HTYPE_INSTANCE
3850
  _OP_REQP = ["instance_name", "disks"]
3851
  REQ_BGL = False
3852

    
3853
  def CheckArguments(self):
3854
    """Check the arguments.
3855

3856
    """
3857
    if not isinstance(self.op.disks, list):
3858
      raise errors.OpPrereqError("Invalid disks parameter")
3859
    for item in self.op.disks:
3860
      if (not isinstance(item, int) or
3861
          item < 0):
3862
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
3863
                                   str(item))
3864

    
3865
  def ExpandNames(self):
3866
    self._ExpandAndLockInstance()
3867

    
3868
  def BuildHooksEnv(self):
3869
    """Build hooks env.
3870

3871
    This runs on master, primary and secondary nodes of the instance.
3872

3873
    """
3874
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3875
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3876
    return env, nl, nl
3877

    
3878
  def CheckPrereq(self):
3879
    """Check prerequisites.
3880

3881
    This checks that the instance is in the cluster and is not running.
3882

3883
    """
3884
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3885
    assert instance is not None, \
3886
      "Cannot retrieve locked instance %s" % self.op.instance_name
3887
    _CheckNodeOnline(self, instance.primary_node)
3888

    
3889
    if instance.disk_template == constants.DT_DISKLESS:
3890
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3891
                                 self.op.instance_name)
3892
    if instance.admin_up:
3893
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3894
                                 self.op.instance_name)
3895
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3896
                                              instance.name,
3897
                                              instance.hypervisor)
3898
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3899
                      prereq=True)
3900
    if remote_info.payload:
3901
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3902
                                 (self.op.instance_name,
3903
                                  instance.primary_node))
3904

    
3905
    if not self.op.disks:
3906
      self.op.disks = range(len(instance.disks))
3907
    else:
3908
      for idx in self.op.disks:
3909
        if idx >= len(instance.disks):
3910
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx)
3911

    
3912
    self.instance = instance
3913

    
3914
  def Exec(self, feedback_fn):
3915
    """Recreate the disks.
3916

3917
    """
3918
    to_skip = []
3919
    for idx, disk in enumerate(self.instance.disks):
3920
      if idx not in self.op.disks: # disk idx has not been passed in
3921
        to_skip.append(idx)
3922
        continue
3923

    
3924
    _CreateDisks(self, self.instance, to_skip=to_skip)
3925

    
3926

    
3927
class LURenameInstance(LogicalUnit):
3928
  """Rename an instance.
3929

3930
  """
3931
  HPATH = "instance-rename"
3932
  HTYPE = constants.HTYPE_INSTANCE
3933
  _OP_REQP = ["instance_name", "new_name"]
3934

    
3935
  def BuildHooksEnv(self):
3936
    """Build hooks env.
3937

3938
    This runs on master, primary and secondary nodes of the instance.
3939

3940
    """
3941
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3942
    env["INSTANCE_NEW_NAME"] = self.op.new_name
3943
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3944
    return env, nl, nl
3945

    
3946
  def CheckPrereq(self):
3947
    """Check prerequisites.
3948

3949
    This checks that the instance is in the cluster and is not running.
3950

3951
    """
3952
    instance = self.cfg.GetInstanceInfo(
3953
      self.cfg.ExpandInstanceName(self.op.instance_name))
3954
    if instance is None:
3955
      raise errors.OpPrereqError("Instance '%s' not known" %
3956
                                 self.op.instance_name)
3957
    _CheckNodeOnline(self, instance.primary_node)
3958

    
3959
    if instance.admin_up:
3960
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3961
                                 self.op.instance_name)
3962
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3963
                                              instance.name,
3964
                                              instance.hypervisor)
3965
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3966
                      prereq=True)
3967
    if remote_info.payload:
3968
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3969
                                 (self.op.instance_name,
3970
                                  instance.primary_node))
3971
    self.instance = instance
3972

    
3973
    # new name verification
3974
    name_info = utils.HostInfo(self.op.new_name)
3975

    
3976
    self.op.new_name = new_name = name_info.name
3977
    instance_list = self.cfg.GetInstanceList()
3978
    if new_name in instance_list:
3979
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3980
                                 new_name)
3981

    
3982
    if not getattr(self.op, "ignore_ip", False):
3983
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
3984
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
3985
                                   (name_info.ip, new_name))
3986

    
3987

    
3988
  def Exec(self, feedback_fn):
3989
    """Reinstall the instance.
3990

3991
    """
3992
    inst = self.instance
3993
    old_name = inst.name
3994

    
3995
    if inst.disk_template == constants.DT_FILE:
3996
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3997

    
3998
    self.cfg.RenameInstance(inst.name, self.op.new_name)
3999
    # Change the instance lock. This is definitely safe while we hold the BGL
4000
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4001
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4002

    
4003
    # re-read the instance from the configuration after rename
4004
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4005

    
4006
    if inst.disk_template == constants.DT_FILE:
4007
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4008
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4009
                                                     old_file_storage_dir,
4010
                                                     new_file_storage_dir)
4011
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4012
                   " (but the instance has been renamed in Ganeti)" %
4013
                   (inst.primary_node, old_file_storage_dir,
4014
                    new_file_storage_dir))
4015

    
4016
    _StartInstanceDisks(self, inst, None)
4017
    try:
4018
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4019
                                                 old_name)
4020
      msg = result.fail_msg
4021
      if msg:
4022
        msg = ("Could not run OS rename script for instance %s on node %s"
4023
               " (but the instance has been renamed in Ganeti): %s" %
4024
               (inst.name, inst.primary_node, msg))
4025
        self.proc.LogWarning(msg)
4026
    finally:
4027
      _ShutdownInstanceDisks(self, inst)
4028

    
4029

    
4030
class LURemoveInstance(LogicalUnit):
4031
  """Remove an instance.
4032

4033
  """
4034
  HPATH = "instance-remove"
4035
  HTYPE = constants.HTYPE_INSTANCE
4036
  _OP_REQP = ["instance_name", "ignore_failures"]
4037
  REQ_BGL = False
4038

    
4039
  def CheckArguments(self):
4040
    """Check the arguments.
4041

4042
    """
4043
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4044
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4045

    
4046
  def ExpandNames(self):
4047
    self._ExpandAndLockInstance()
4048
    self.needed_locks[locking.LEVEL_NODE] = []
4049
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4050

    
4051
  def DeclareLocks(self, level):
4052
    if level == locking.LEVEL_NODE:
4053
      self._LockInstancesNodes()
4054

    
4055
  def BuildHooksEnv(self):
4056
    """Build hooks env.
4057

4058
    This runs on master, primary and secondary nodes of the instance.
4059

4060
    """
4061
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4062
    env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4063
    nl = [self.cfg.GetMasterNode()]
4064
    return env, nl, nl
4065

    
4066
  def CheckPrereq(self):
4067
    """Check prerequisites.
4068

4069
    This checks that the instance is in the cluster.
4070

4071
    """
4072
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4073
    assert self.instance is not None, \
4074
      "Cannot retrieve locked instance %s" % self.op.instance_name
4075

    
4076
  def Exec(self, feedback_fn):
4077
    """Remove the instance.
4078

4079
    """
4080
    instance = self.instance
4081
    logging.info("Shutting down instance %s on node %s",
4082
                 instance.name, instance.primary_node)
4083

    
4084
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4085
                                             self.shutdown_timeout)
4086
    msg = result.fail_msg
4087
    if msg:
4088
      if self.op.ignore_failures:
4089
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
4090
      else:
4091
        raise errors.OpExecError("Could not shutdown instance %s on"
4092
                                 " node %s: %s" %
4093
                                 (instance.name, instance.primary_node, msg))
4094

    
4095
    logging.info("Removing block devices for instance %s", instance.name)
4096

    
4097
    if not _RemoveDisks(self, instance):
4098
      if self.op.ignore_failures:
4099
        feedback_fn("Warning: can't remove instance's disks")
4100
      else:
4101
        raise errors.OpExecError("Can't remove instance's disks")
4102

    
4103
    logging.info("Removing instance %s out of cluster config", instance.name)
4104

    
4105
    self.cfg.RemoveInstance(instance.name)
4106
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4107

    
4108

    
4109
class LUQueryInstances(NoHooksLU):
4110
  """Logical unit for querying instances.
4111

4112
  """
4113
  _OP_REQP = ["output_fields", "names", "use_locking"]
4114
  REQ_BGL = False
4115
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4116
                    "serial_no", "ctime", "mtime", "uuid"]
4117
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4118
                                    "admin_state",
4119
                                    "disk_template", "ip", "mac", "bridge",
4120
                                    "nic_mode", "nic_link",
4121
                                    "sda_size", "sdb_size", "vcpus", "tags",
4122
                                    "network_port", "beparams",
4123
                                    r"(disk)\.(size)/([0-9]+)",
4124
                                    r"(disk)\.(sizes)", "disk_usage",
4125
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4126
                                    r"(nic)\.(bridge)/([0-9]+)",
4127
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
4128
                                    r"(disk|nic)\.(count)",
4129
                                    "hvparams",
4130
                                    ] + _SIMPLE_FIELDS +
4131
                                  ["hv/%s" % name
4132
                                   for name in constants.HVS_PARAMETERS] +
4133
                                  ["be/%s" % name
4134
                                   for name in constants.BES_PARAMETERS])
4135
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4136

    
4137

    
4138
  def ExpandNames(self):
4139
    _CheckOutputFields(static=self._FIELDS_STATIC,
4140
                       dynamic=self._FIELDS_DYNAMIC,
4141
                       selected=self.op.output_fields)
4142

    
4143
    self.needed_locks = {}
4144
    self.share_locks[locking.LEVEL_INSTANCE] = 1
4145
    self.share_locks[locking.LEVEL_NODE] = 1
4146

    
4147
    if self.op.names:
4148
      self.wanted = _GetWantedInstances(self, self.op.names)
4149
    else:
4150
      self.wanted = locking.ALL_SET
4151

    
4152
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4153
    self.do_locking = self.do_node_query and self.op.use_locking
4154
    if self.do_locking:
4155
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4156
      self.needed_locks[locking.LEVEL_NODE] = []
4157
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4158

    
4159
  def DeclareLocks(self, level):
4160
    if level == locking.LEVEL_NODE and self.do_locking:
4161
      self._LockInstancesNodes()
4162

    
4163
  def CheckPrereq(self):
4164
    """Check prerequisites.
4165

4166
    """
4167
    pass
4168

    
4169
  def Exec(self, feedback_fn):
4170
    """Computes the list of nodes and their attributes.
4171

4172
    """
4173
    all_info = self.cfg.GetAllInstancesInfo()
4174
    if self.wanted == locking.ALL_SET:
4175
      # caller didn't specify instance names, so ordering is not important
4176
      if self.do_locking:
4177
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4178
      else:
4179
        instance_names = all_info.keys()
4180
      instance_names = utils.NiceSort(instance_names)
4181
    else:
4182
      # caller did specify names, so we must keep the ordering
4183
      if self.do_locking:
4184
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4185
      else:
4186
        tgt_set = all_info.keys()
4187
      missing = set(self.wanted).difference(tgt_set)
4188
      if missing:
4189
        raise errors.OpExecError("Some instances were removed before"
4190
                                 " retrieving their data: %s" % missing)
4191
      instance_names = self.wanted
4192

    
4193
    instance_list = [all_info[iname] for iname in instance_names]
4194

    
4195
    # begin data gathering
4196

    
4197
    nodes = frozenset([inst.primary_node for inst in instance_list])
4198
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4199

    
4200
    bad_nodes = []
4201
    off_nodes = []
4202
    if self.do_node_query:
4203
      live_data = {}
4204
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4205
      for name in nodes:
4206
        result = node_data[name]
4207
        if result.offline:
4208
          # offline nodes will be in both lists
4209
          off_nodes.append(name)
4210
        if result.fail_msg:
4211
          bad_nodes.append(name)
4212
        else:
4213
          if result.payload:
4214
            live_data.update(result.payload)
4215
          # else no instance is alive
4216
    else:
4217
      live_data = dict([(name, {}) for name in instance_names])
4218

    
4219
    # end data gathering
4220

    
4221
    HVPREFIX = "hv/"
4222
    BEPREFIX = "be/"
4223
    output = []
4224
    cluster = self.cfg.GetClusterInfo()
4225
    for instance in instance_list:
4226
      iout = []
4227
      i_hv = cluster.FillHV(instance)
4228
      i_be = cluster.FillBE(instance)
4229
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4230
                                 nic.nicparams) for nic in instance.nics]
4231
      for field in self.op.output_fields:
4232
        st_match = self._FIELDS_STATIC.Matches(field)
4233
        if field in self._SIMPLE_FIELDS:
4234
          val = getattr(instance, field)
4235
        elif field == "pnode":
4236
          val = instance.primary_node
4237
        elif field == "snodes":
4238
          val = list(instance.secondary_nodes)
4239
        elif field == "admin_state":
4240
          val = instance.admin_up
4241
        elif field == "oper_state":
4242
          if instance.primary_node in bad_nodes:
4243
            val = None
4244
          else:
4245
            val = bool(live_data.get(instance.name))
4246
        elif field == "status":
4247
          if instance.primary_node in off_nodes:
4248
            val = "ERROR_nodeoffline"
4249
          elif instance.primary_node in bad_nodes:
4250
            val = "ERROR_nodedown"
4251
          else:
4252
            running = bool(live_data.get(instance.name))
4253
            if running:
4254
              if instance.admin_up:
4255
                val = "running"
4256
              else:
4257
                val = "ERROR_up"
4258
            else:
4259
              if instance.admin_up:
4260
                val = "ERROR_down"
4261
              else:
4262
                val = "ADMIN_down"
4263
        elif field == "oper_ram":
4264
          if instance.primary_node in bad_nodes:
4265
            val = None
4266
          elif instance.name in live_data:
4267
            val = live_data[instance.name].get("memory", "?")
4268
          else:
4269
            val = "-"
4270
        elif field == "vcpus":
4271
          val = i_be[constants.BE_VCPUS]
4272
        elif field == "disk_template":
4273
          val = instance.disk_template
4274
        elif field == "ip":
4275
          if instance.nics:
4276
            val = instance.nics[0].ip
4277
          else:
4278
            val = None
4279
        elif field == "nic_mode":
4280
          if instance.nics:
4281
            val = i_nicp[0][constants.NIC_MODE]
4282
          else:
4283
            val = None
4284
        elif field == "nic_link":
4285
          if instance.nics:
4286
            val = i_nicp[0][constants.NIC_LINK]
4287
          else:
4288
            val = None
4289
        elif field == "bridge":
4290
          if (instance.nics and
4291
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4292
            val = i_nicp[0][constants.NIC_LINK]
4293
          else:
4294
            val = None
4295
        elif field == "mac":
4296
          if instance.nics:
4297
            val = instance.nics[0].mac
4298
          else:
4299
            val = None
4300
        elif field == "sda_size" or field == "sdb_size":
4301
          idx = ord(field[2]) - ord('a')
4302
          try:
4303
            val = instance.FindDisk(idx).size
4304
          except errors.OpPrereqError:
4305
            val = None
4306
        elif field == "disk_usage": # total disk usage per node
4307
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
4308
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4309
        elif field == "tags":
4310
          val = list(instance.GetTags())
4311
        elif field == "hvparams":
4312
          val = i_hv
4313
        elif (field.startswith(HVPREFIX) and
4314
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
4315
          val = i_hv.get(field[len(HVPREFIX):], None)
4316
        elif field == "beparams":
4317
          val = i_be
4318
        elif (field.startswith(BEPREFIX) and
4319
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4320
          val = i_be.get(field[len(BEPREFIX):], None)
4321
        elif st_match and st_match.groups():
4322
          # matches a variable list
4323
          st_groups = st_match.groups()
4324
          if st_groups and st_groups[0] == "disk":
4325
            if st_groups[1] == "count":
4326
              val = len(instance.disks)
4327
            elif st_groups[1] == "sizes":
4328
              val = [disk.size for disk in instance.disks]
4329
            elif st_groups[1] == "size":
4330
              try:
4331
                val = instance.FindDisk(st_groups[2]).size
4332
              except errors.OpPrereqError:
4333
                val = None
4334
            else:
4335
              assert False, "Unhandled disk parameter"
4336
          elif st_groups[0] == "nic":
4337
            if st_groups[1] == "count":
4338
              val = len(instance.nics)
4339
            elif st_groups[1] == "macs":
4340
              val = [nic.mac for nic in instance.nics]
4341
            elif st_groups[1] == "ips":
4342
              val = [nic.ip for nic in instance.nics]
4343
            elif st_groups[1] == "modes":
4344
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4345
            elif st_groups[1] == "links":
4346
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4347
            elif st_groups[1] == "bridges":
4348
              val = []
4349
              for nicp in i_nicp:
4350
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4351
                  val.append(nicp[constants.NIC_LINK])
4352
                else:
4353
                  val.append(None)
4354
            else:
4355
              # index-based item
4356
              nic_idx = int(st_groups[2])
4357
              if nic_idx >= len(instance.nics):
4358
                val = None
4359
              else:
4360
                if st_groups[1] == "mac":
4361
                  val = instance.nics[nic_idx].mac
4362
                elif st_groups[1] == "ip":
4363
                  val = instance.nics[nic_idx].ip
4364
                elif st_groups[1] == "mode":
4365
                  val = i_nicp[nic_idx][constants.NIC_MODE]
4366
                elif st_groups[1] == "link":
4367
                  val = i_nicp[nic_idx][constants.NIC_LINK]
4368
                elif st_groups[1] == "bridge":
4369
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4370
                  if nic_mode == constants.NIC_MODE_BRIDGED:
4371
                    val = i_nicp[nic_idx][constants.NIC_LINK]
4372
                  else:
4373
                    val = None
4374
                else:
4375
                  assert False, "Unhandled NIC parameter"
4376
          else:
4377
            assert False, ("Declared but unhandled variable parameter '%s'" %
4378
                           field)
4379
        else:
4380
          assert False, "Declared but unhandled parameter '%s'" % field
4381
        iout.append(val)
4382
      output.append(iout)
4383

    
4384
    return output
4385

    
4386

    
4387
class LUFailoverInstance(LogicalUnit):
4388
  """Failover an instance.
4389

4390
  """
4391
  HPATH = "instance-failover"
4392
  HTYPE = constants.HTYPE_INSTANCE
4393
  _OP_REQP = ["instance_name", "ignore_consistency"]
4394
  REQ_BGL = False
4395

    
4396
  def CheckArguments(self):
4397
    """Check the arguments.
4398

4399
    """
4400
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4401
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4402

    
4403
  def ExpandNames(self):
4404
    self._ExpandAndLockInstance()
4405
    self.needed_locks[locking.LEVEL_NODE] = []
4406
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4407

    
4408
  def DeclareLocks(self, level):
4409
    if level == locking.LEVEL_NODE:
4410
      self._LockInstancesNodes()
4411

    
4412
  def BuildHooksEnv(self):
4413
    """Build hooks env.
4414

4415
    This runs on master, primary and secondary nodes of the instance.
4416

4417
    """
4418
    env = {
4419
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4420
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4421
      }
4422
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4423
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
4424
    return env, nl, nl
4425

    
4426
  def CheckPrereq(self):
4427
    """Check prerequisites.
4428

4429
    This checks that the instance is in the cluster.
4430

4431
    """
4432
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4433
    assert self.instance is not None, \
4434
      "Cannot retrieve locked instance %s" % self.op.instance_name
4435

    
4436
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4437
    if instance.disk_template not in constants.DTS_NET_MIRROR:
4438
      raise errors.OpPrereqError("Instance's disk layout is not"
4439
                                 " network mirrored, cannot failover.")
4440

    
4441
    secondary_nodes = instance.secondary_nodes
4442
    if not secondary_nodes:
4443
      raise errors.ProgrammerError("no secondary node but using "
4444
                                   "a mirrored disk template")
4445

    
4446
    target_node = secondary_nodes[0]
4447
    _CheckNodeOnline(self, target_node)
4448
    _CheckNodeNotDrained(self, target_node)
4449
    if instance.admin_up:
4450
      # check memory requirements on the secondary node
4451
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4452
                           instance.name, bep[constants.BE_MEMORY],
4453
                           instance.hypervisor)
4454
    else:
4455
      self.LogInfo("Not checking memory on the secondary node as"
4456
                   " instance will not be started")
4457

    
4458
    # check bridge existance
4459
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4460

    
4461
  def Exec(self, feedback_fn):
4462
    """Failover an instance.
4463

4464
    The failover is done by shutting it down on its present node and
4465
    starting it on the secondary.
4466

4467
    """
4468
    instance = self.instance
4469

    
4470
    source_node = instance.primary_node
4471
    target_node = instance.secondary_nodes[0]
4472

    
4473
    feedback_fn("* checking disk consistency between source and target")
4474
    for dev in instance.disks:
4475
      # for drbd, these are drbd over lvm
4476
      if not _CheckDiskConsistency(self, dev, target_node, False):
4477
        if instance.admin_up and not self.op.ignore_consistency:
4478
          raise errors.OpExecError("Disk %s is degraded on target node,"
4479
                                   " aborting failover." % dev.iv_name)
4480

    
4481
    feedback_fn("* shutting down instance on source node")
4482
    logging.info("Shutting down instance %s on node %s",
4483
                 instance.name, source_node)
4484

    
4485
    result = self.rpc.call_instance_shutdown(source_node, instance,
4486
                                             self.shutdown_timeout)
4487
    msg = result.fail_msg
4488
    if msg:
4489
      if self.op.ignore_consistency:
4490
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4491
                             " Proceeding anyway. Please make sure node"
4492
                             " %s is down. Error details: %s",
4493
                             instance.name, source_node, source_node, msg)
4494
      else:
4495
        raise errors.OpExecError("Could not shutdown instance %s on"
4496
                                 " node %s: %s" %
4497
                                 (instance.name, source_node, msg))
4498

    
4499
    feedback_fn("* deactivating the instance's disks on source node")
4500
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
4501
      raise errors.OpExecError("Can't shut down the instance's disks.")
4502

    
4503
    instance.primary_node = target_node
4504
    # distribute new instance config to the other nodes
4505
    self.cfg.Update(instance, feedback_fn)
4506

    
4507
    # Only start the instance if it's marked as up
4508
    if instance.admin_up:
4509
      feedback_fn("* activating the instance's disks on target node")
4510
      logging.info("Starting instance %s on node %s",
4511
                   instance.name, target_node)
4512

    
4513
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4514
                                               ignore_secondaries=True)
4515
      if not disks_ok:
4516
        _ShutdownInstanceDisks(self, instance)
4517
        raise errors.OpExecError("Can't activate the instance's disks")
4518

    
4519
      feedback_fn("* starting the instance on the target node")
4520
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4521
      msg = result.fail_msg
4522
      if msg:
4523
        _ShutdownInstanceDisks(self, instance)
4524
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4525
                                 (instance.name, target_node, msg))
4526

    
4527

    
4528
class LUMigrateInstance(LogicalUnit):
4529
  """Migrate an instance.
4530

4531
  This is migration without shutting down, compared to the failover,
4532
  which is done with shutdown.
4533

4534
  """
4535
  HPATH = "instance-migrate"
4536
  HTYPE = constants.HTYPE_INSTANCE
4537
  _OP_REQP = ["instance_name", "live", "cleanup"]
4538

    
4539
  REQ_BGL = False
4540

    
4541
  def ExpandNames(self):
4542
    self._ExpandAndLockInstance()
4543

    
4544
    self.needed_locks[locking.LEVEL_NODE] = []
4545
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4546

    
4547
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
4548
                                       self.op.live, self.op.cleanup)
4549
    self.tasklets = [self._migrater]
4550

    
4551
  def DeclareLocks(self, level):
4552
    if level == locking.LEVEL_NODE:
4553
      self._LockInstancesNodes()
4554

    
4555
  def BuildHooksEnv(self):
4556
    """Build hooks env.
4557

4558
    This runs on master, primary and secondary nodes of the instance.
4559

4560
    """
4561
    instance = self._migrater.instance
4562
    env = _BuildInstanceHookEnvByObject(self, instance)
4563
    env["MIGRATE_LIVE"] = self.op.live
4564
    env["MIGRATE_CLEANUP"] = self.op.cleanup
4565
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4566
    return env, nl, nl
4567

    
4568

    
4569
class LUMoveInstance(LogicalUnit):
4570
  """Move an instance by data-copying.
4571

4572
  """
4573
  HPATH = "instance-move"
4574
  HTYPE = constants.HTYPE_INSTANCE
4575
  _OP_REQP = ["instance_name", "target_node"]
4576
  REQ_BGL = False
4577

    
4578
  def CheckArguments(self):
4579
    """Check the arguments.
4580

4581
    """
4582
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4583
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4584

    
4585
  def ExpandNames(self):
4586
    self._ExpandAndLockInstance()
4587
    target_node = self.cfg.ExpandNodeName(self.op.target_node)
4588
    if target_node is None:
4589
      raise errors.OpPrereqError("Node '%s' not known" %
4590
                                  self.op.target_node)
4591
    self.op.target_node = target_node
4592
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
4593
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4594

    
4595
  def DeclareLocks(self, level):
4596
    if level == locking.LEVEL_NODE:
4597
      self._LockInstancesNodes(primary_only=True)
4598

    
4599
  def BuildHooksEnv(self):
4600
    """Build hooks env.
4601

4602
    This runs on master, primary and secondary nodes of the instance.
4603

4604
    """
4605
    env = {
4606
      "TARGET_NODE": self.op.target_node,
4607
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4608
      }
4609
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4610
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
4611
                                       self.op.target_node]
4612
    return env, nl, nl
4613

    
4614
  def CheckPrereq(self):
4615
    """Check prerequisites.
4616

4617
    This checks that the instance is in the cluster.
4618

4619
    """
4620
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4621
    assert self.instance is not None, \
4622
      "Cannot retrieve locked instance %s" % self.op.instance_name
4623

    
4624
    node = self.cfg.GetNodeInfo(self.op.target_node)
4625
    assert node is not None, \
4626
      "Cannot retrieve locked node %s" % self.op.target_node
4627

    
4628
    self.target_node = target_node = node.name
4629

    
4630
    if target_node == instance.primary_node:
4631
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
4632
                                 (instance.name, target_node))
4633

    
4634
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4635

    
4636
    for idx, dsk in enumerate(instance.disks):
4637
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
4638
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
4639
                                   " cannot copy")
4640

    
4641
    _CheckNodeOnline(self, target_node)
4642
    _CheckNodeNotDrained(self, target_node)
4643

    
4644
    if instance.admin_up:
4645
      # check memory requirements on the secondary node
4646
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4647
                           instance.name, bep[constants.BE_MEMORY],
4648
                           instance.hypervisor)
4649
    else:
4650
      self.LogInfo("Not checking memory on the secondary node as"
4651
                   " instance will not be started")
4652

    
4653
    # check bridge existance
4654
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4655

    
4656
  def Exec(self, feedback_fn):
4657
    """Move an instance.
4658

4659
    The move is done by shutting it down on its present node, copying
4660
    the data over (slow) and starting it on the new node.
4661

4662
    """
4663
    instance = self.instance
4664

    
4665
    source_node = instance.primary_node
4666
    target_node = self.target_node
4667

    
4668
    self.LogInfo("Shutting down instance %s on source node %s",
4669
                 instance.name, source_node)
4670

    
4671
    result = self.rpc.call_instance_shutdown(source_node, instance,
4672
                                             self.shutdown_timeout)
4673
    msg = result.fail_msg
4674
    if msg:
4675
      if self.op.ignore_consistency:
4676
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4677
                             " Proceeding anyway. Please make sure node"
4678
                             " %s is down. Error details: %s",
4679
                             instance.name, source_node, source_node, msg)
4680
      else:
4681
        raise errors.OpExecError("Could not shutdown instance %s on"
4682
                                 " node %s: %s" %
4683
                                 (instance.name, source_node, msg))
4684

    
4685
    # create the target disks
4686
    try:
4687
      _CreateDisks(self, instance, target_node=target_node)
4688
    except errors.OpExecError:
4689
      self.LogWarning("Device creation failed, reverting...")
4690
      try:
4691
        _RemoveDisks(self, instance, target_node=target_node)
4692
      finally:
4693
        self.cfg.ReleaseDRBDMinors(instance.name)
4694
        raise
4695

    
4696
    cluster_name = self.cfg.GetClusterInfo().cluster_name
4697

    
4698
    errs = []
4699
    # activate, get path, copy the data over
4700
    for idx, disk in enumerate(instance.disks):
4701
      self.LogInfo("Copying data for disk %d", idx)
4702
      result = self.rpc.call_blockdev_assemble(target_node, disk,
4703
                                               instance.name, True)
4704
      if result.fail_msg:
4705
        self.LogWarning("Can't assemble newly created disk %d: %s",
4706
                        idx, result.fail_msg)
4707
        errs.append(result.fail_msg)
4708
        break
4709
      dev_path = result.payload
4710
      result = self.rpc.call_blockdev_export(source_node, disk,
4711
                                             target_node, dev_path,
4712
                                             cluster_name)
4713
      if result.fail_msg:
4714
        self.LogWarning("Can't copy data over for disk %d: %s",
4715
                        idx, result.fail_msg)
4716
        errs.append(result.fail_msg)
4717
        break
4718

    
4719
    if errs:
4720
      self.LogWarning("Some disks failed to copy, aborting")
4721
      try:
4722
        _RemoveDisks(self, instance, target_node=target_node)
4723
      finally:
4724
        self.cfg.ReleaseDRBDMinors(instance.name)
4725
        raise errors.OpExecError("Errors during disk copy: %s" %
4726
                                 (",".join(errs),))
4727

    
4728
    instance.primary_node = target_node
4729
    self.cfg.Update(instance, feedback_fn)
4730

    
4731
    self.LogInfo("Removing the disks on the original node")
4732
    _RemoveDisks(self, instance, target_node=source_node)
4733

    
4734
    # Only start the instance if it's marked as up
4735
    if instance.admin_up:
4736
      self.LogInfo("Starting instance %s on node %s",
4737
                   instance.name, target_node)
4738

    
4739
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4740
                                           ignore_secondaries=True)
4741
      if not disks_ok:
4742
        _ShutdownInstanceDisks(self, instance)
4743
        raise errors.OpExecError("Can't activate the instance's disks")
4744

    
4745
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4746
      msg = result.fail_msg
4747
      if msg:
4748
        _ShutdownInstanceDisks(self, instance)
4749
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4750
                                 (instance.name, target_node, msg))
4751

    
4752

    
4753
class LUMigrateNode(LogicalUnit):
4754
  """Migrate all instances from a node.
4755

4756
  """
4757
  HPATH = "node-migrate"
4758
  HTYPE = constants.HTYPE_NODE
4759
  _OP_REQP = ["node_name", "live"]
4760
  REQ_BGL = False
4761

    
4762
  def ExpandNames(self):
4763
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
4764
    if self.op.node_name is None:
4765
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
4766

    
4767
    self.needed_locks = {
4768
      locking.LEVEL_NODE: [self.op.node_name],
4769
      }
4770

    
4771
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4772

    
4773
    # Create tasklets for migrating instances for all instances on this node
4774
    names = []
4775
    tasklets = []
4776

    
4777
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
4778
      logging.debug("Migrating instance %s", inst.name)
4779
      names.append(inst.name)
4780

    
4781
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
4782

    
4783
    self.tasklets = tasklets
4784

    
4785
    # Declare instance locks
4786
    self.needed_locks[locking.LEVEL_INSTANCE] = names
4787

    
4788
  def DeclareLocks(self, level):
4789
    if level == locking.LEVEL_NODE:
4790
      self._LockInstancesNodes()
4791

    
4792
  def BuildHooksEnv(self):
4793
    """Build hooks env.
4794

4795
    This runs on the master, the primary and all the secondaries.
4796

4797
    """
4798
    env = {
4799
      "NODE_NAME": self.op.node_name,
4800
      }
4801

    
4802
    nl = [self.cfg.GetMasterNode()]
4803

    
4804
    return (env, nl, nl)
4805

    
4806

    
4807
class TLMigrateInstance(Tasklet):
4808
  def __init__(self, lu, instance_name, live, cleanup):
4809
    """Initializes this class.
4810

4811
    """
4812
    Tasklet.__init__(self, lu)
4813

    
4814
    # Parameters
4815
    self.instance_name = instance_name
4816
    self.live = live
4817
    self.cleanup = cleanup
4818

    
4819
  def CheckPrereq(self):
4820
    """Check prerequisites.
4821

4822
    This checks that the instance is in the cluster.
4823

4824
    """
4825
    instance = self.cfg.GetInstanceInfo(
4826
      self.cfg.ExpandInstanceName(self.instance_name))
4827
    if instance is None:
4828
      raise errors.OpPrereqError("Instance '%s' not known" %
4829
                                 self.instance_name)
4830

    
4831
    if instance.disk_template != constants.DT_DRBD8:
4832
      raise errors.OpPrereqError("Instance's disk layout is not"
4833
                                 " drbd8, cannot migrate.")
4834

    
4835
    secondary_nodes = instance.secondary_nodes
4836
    if not secondary_nodes:
4837
      raise errors.ConfigurationError("No secondary node but using"
4838
                                      " drbd8 disk template")
4839

    
4840
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
4841

    
4842
    target_node = secondary_nodes[0]
4843
    # check memory requirements on the secondary node
4844
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
4845
                         instance.name, i_be[constants.BE_MEMORY],
4846
                         instance.hypervisor)
4847

    
4848
    # check bridge existance
4849
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4850

    
4851
    if not self.cleanup:
4852
      _CheckNodeNotDrained(self, target_node)
4853
      result = self.rpc.call_instance_migratable(instance.primary_node,
4854
                                                 instance)
4855
      result.Raise("Can't migrate, please use failover", prereq=True)
4856

    
4857
    self.instance = instance
4858

    
4859
  def _WaitUntilSync(self):
4860
    """Poll with custom rpc for disk sync.
4861

4862
    This uses our own step-based rpc call.
4863

4864
    """
4865
    self.feedback_fn("* wait until resync is done")
4866
    all_done = False
4867
    while not all_done:
4868
      all_done = True
4869
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
4870
                                            self.nodes_ip,
4871
                                            self.instance.disks)
4872
      min_percent = 100
4873
      for node, nres in result.items():
4874
        nres.Raise("Cannot resync disks on node %s" % node)
4875
        node_done, node_percent = nres.payload
4876
        all_done = all_done and node_done
4877
        if node_percent is not None:
4878
          min_percent = min(min_percent, node_percent)
4879
      if not all_done:
4880
        if min_percent < 100:
4881
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
4882
        time.sleep(2)
4883

    
4884
  def _EnsureSecondary(self, node):
4885
    """Demote a node to secondary.
4886

4887
    """
4888
    self.feedback_fn("* switching node %s to secondary mode" % node)
4889

    
4890
    for dev in self.instance.disks:
4891
      self.cfg.SetDiskID(dev, node)
4892

    
4893
    result = self.rpc.call_blockdev_close(node, self.instance.name,
4894
                                          self.instance.disks)
4895
    result.Raise("Cannot change disk to secondary on node %s" % node)
4896

    
4897
  def _GoStandalone(self):
4898
    """Disconnect from the network.
4899

4900
    """
4901
    self.feedback_fn("* changing into standalone mode")
4902
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
4903
                                               self.instance.disks)
4904
    for node, nres in result.items():
4905
      nres.Raise("Cannot disconnect disks node %s" % node)
4906

    
4907
  def _GoReconnect(self, multimaster):
4908
    """Reconnect to the network.
4909

4910
    """
4911
    if multimaster:
4912
      msg = "dual-master"
4913
    else:
4914
      msg = "single-master"
4915
    self.feedback_fn("* changing disks into %s mode" % msg)
4916
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
4917
                                           self.instance.disks,
4918
                                           self.instance.name, multimaster)
4919
    for node, nres in result.items():
4920
      nres.Raise("Cannot change disks config on node %s" % node)
4921

    
4922
  def _ExecCleanup(self):
4923
    """Try to cleanup after a failed migration.
4924

4925
    The cleanup is done by:
4926
      - check that the instance is running only on one node
4927
        (and update the config if needed)
4928
      - change disks on its secondary node to secondary
4929
      - wait until disks are fully synchronized
4930
      - disconnect from the network
4931
      - change disks into single-master mode
4932
      - wait again until disks are fully synchronized
4933

4934
    """
4935
    instance = self.instance
4936
    target_node = self.target_node
4937
    source_node = self.source_node
4938

    
4939
    # check running on only one node
4940
    self.feedback_fn("* checking where the instance actually runs"
4941
                     " (if this hangs, the hypervisor might be in"
4942
                     " a bad state)")
4943
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
4944
    for node, result in ins_l.items():
4945
      result.Raise("Can't contact node %s" % node)
4946

    
4947
    runningon_source = instance.name in ins_l[source_node].payload
4948
    runningon_target = instance.name in ins_l[target_node].payload
4949

    
4950
    if runningon_source and runningon_target:
4951
      raise errors.OpExecError("Instance seems to be running on two nodes,"
4952
                               " or the hypervisor is confused. You will have"
4953
                               " to ensure manually that it runs only on one"
4954
                               " and restart this operation.")
4955

    
4956
    if not (runningon_source or runningon_target):
4957
      raise errors.OpExecError("Instance does not seem to be running at all."
4958
                               " In this case, it's safer to repair by"
4959
                               " running 'gnt-instance stop' to ensure disk"
4960
                               " shutdown, and then restarting it.")
4961

    
4962
    if runningon_target:
4963
      # the migration has actually succeeded, we need to update the config
4964
      self.feedback_fn("* instance running on secondary node (%s),"
4965
                       " updating config" % target_node)
4966
      instance.primary_node = target_node
4967
      self.cfg.Update(instance, self.feedback_fn)
4968
      demoted_node = source_node
4969
    else:
4970
      self.feedback_fn("* instance confirmed to be running on its"
4971
                       " primary node (%s)" % source_node)
4972
      demoted_node = target_node
4973

    
4974
    self._EnsureSecondary(demoted_node)
4975
    try:
4976
      self._WaitUntilSync()
4977
    except errors.OpExecError:
4978
      # we ignore here errors, since if the device is standalone, it
4979
      # won't be able to sync
4980
      pass
4981
    self._GoStandalone()
4982
    self._GoReconnect(False)
4983
    self._WaitUntilSync()
4984

    
4985
    self.feedback_fn("* done")
4986

    
4987
  def _RevertDiskStatus(self):
4988
    """Try to revert the disk status after a failed migration.
4989

4990
    """
4991
    target_node = self.target_node
4992
    try:
4993
      self._EnsureSecondary(target_node)
4994
      self._GoStandalone()
4995
      self._GoReconnect(False)
4996
      self._WaitUntilSync()
4997
    except errors.OpExecError, err:
4998
      self.lu.LogWarning("Migration failed and I can't reconnect the"
4999
                         " drives: error '%s'\n"
5000
                         "Please look and recover the instance status" %
5001
                         str(err))
5002

    
5003
  def _AbortMigration(self):
5004
    """Call the hypervisor code to abort a started migration.
5005

5006
    """
5007
    instance = self.instance
5008
    target_node = self.target_node
5009
    migration_info = self.migration_info
5010

    
5011
    abort_result = self.rpc.call_finalize_migration(target_node,
5012
                                                    instance,
5013
                                                    migration_info,
5014
                                                    False)
5015
    abort_msg = abort_result.fail_msg
5016
    if abort_msg:
5017
      logging.error("Aborting migration failed on target node %s: %s" %
5018
                    (target_node, abort_msg))
5019
      # Don't raise an exception here, as we stil have to try to revert the
5020
      # disk status, even if this step failed.
5021

    
5022
  def _ExecMigration(self):
5023
    """Migrate an instance.
5024

5025
    The migrate is done by:
5026
      - change the disks into dual-master mode
5027
      - wait until disks are fully synchronized again
5028
      - migrate the instance
5029
      - change disks on the new secondary node (the old primary) to secondary
5030
      - wait until disks are fully synchronized
5031
      - change disks into single-master mode
5032

5033
    """
5034
    instance = self.instance
5035
    target_node = self.target_node
5036
    source_node = self.source_node
5037

    
5038
    self.feedback_fn("* checking disk consistency between source and target")
5039
    for dev in instance.disks:
5040
      if not _CheckDiskConsistency(self, dev, target_node, False):
5041
        raise errors.OpExecError("Disk %s is degraded or not fully"
5042
                                 " synchronized on target node,"
5043
                                 " aborting migrate." % dev.iv_name)
5044

    
5045
    # First get the migration information from the remote node
5046
    result = self.rpc.call_migration_info(source_node, instance)
5047
    msg = result.fail_msg
5048
    if msg:
5049
      log_err = ("Failed fetching source migration information from %s: %s" %
5050
                 (source_node, msg))
5051
      logging.error(log_err)
5052
      raise errors.OpExecError(log_err)
5053

    
5054
    self.migration_info = migration_info = result.payload
5055

    
5056
    # Then switch the disks to master/master mode
5057
    self._EnsureSecondary(target_node)
5058
    self._GoStandalone()
5059
    self._GoReconnect(True)
5060
    self._WaitUntilSync()
5061

    
5062
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
5063
    result = self.rpc.call_accept_instance(target_node,
5064
                                           instance,
5065
                                           migration_info,
5066
                                           self.nodes_ip[target_node])
5067

    
5068
    msg = result.fail_msg
5069
    if msg:
5070
      logging.error("Instance pre-migration failed, trying to revert"
5071
                    " disk status: %s", msg)
5072
      self._AbortMigration()
5073
      self._RevertDiskStatus()
5074
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5075
                               (instance.name, msg))
5076

    
5077
    self.feedback_fn("* migrating instance to %s" % target_node)
5078
    time.sleep(10)
5079
    result = self.rpc.call_instance_migrate(source_node, instance,
5080
                                            self.nodes_ip[target_node],
5081
                                            self.live)
5082
    msg = result.fail_msg
5083
    if msg:
5084
      logging.error("Instance migration failed, trying to revert"
5085
                    " disk status: %s", msg)
5086
      self._AbortMigration()
5087
      self._RevertDiskStatus()
5088
      raise errors.OpExecError("Could not migrate instance %s: %s" %
5089
                               (instance.name, msg))
5090
    time.sleep(10)
5091

    
5092
    instance.primary_node = target_node
5093
    # distribute new instance config to the other nodes
5094
    self.cfg.Update(instance, self.feedback_fn)
5095

    
5096
    result = self.rpc.call_finalize_migration(target_node,
5097
                                              instance,
5098
                                              migration_info,
5099
                                              True)
5100
    msg = result.fail_msg
5101
    if msg:
5102
      logging.error("Instance migration succeeded, but finalization failed:"
5103
                    " %s" % msg)
5104
      raise errors.OpExecError("Could not finalize instance migration: %s" %
5105
                               msg)
5106

    
5107
    self._EnsureSecondary(source_node)
5108
    self._WaitUntilSync()
5109
    self._GoStandalone()
5110
    self._GoReconnect(False)
5111
    self._WaitUntilSync()
5112

    
5113
    self.feedback_fn("* done")
5114

    
5115
  def Exec(self, feedback_fn):
5116
    """Perform the migration.
5117

5118
    """
5119
    feedback_fn("Migrating instance %s" % self.instance.name)
5120

    
5121
    self.feedback_fn = feedback_fn
5122

    
5123
    self.source_node = self.instance.primary_node
5124
    self.target_node = self.instance.secondary_nodes[0]
5125
    self.all_nodes = [self.source_node, self.target_node]
5126
    self.nodes_ip = {
5127
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5128
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5129
      }
5130

    
5131
    if self.cleanup:
5132
      return self._ExecCleanup()
5133
    else:
5134
      return self._ExecMigration()
5135

    
5136

    
5137
def _CreateBlockDev(lu, node, instance, device, force_create,
5138
                    info, force_open):
5139
  """Create a tree of block devices on a given node.
5140

5141
  If this device type has to be created on secondaries, create it and
5142
  all its children.
5143

5144
  If not, just recurse to children keeping the same 'force' value.
5145

5146
  @param lu: the lu on whose behalf we execute
5147
  @param node: the node on which to create the device
5148
  @type instance: L{objects.Instance}
5149
  @param instance: the instance which owns the device
5150
  @type device: L{objects.Disk}
5151
  @param device: the device to create
5152
  @type force_create: boolean
5153
  @param force_create: whether to force creation of this device; this
5154
      will be change to True whenever we find a device which has
5155
      CreateOnSecondary() attribute
5156
  @param info: the extra 'metadata' we should attach to the device
5157
      (this will be represented as a LVM tag)
5158
  @type force_open: boolean
5159
  @param force_open: this parameter will be passes to the
5160
      L{backend.BlockdevCreate} function where it specifies
5161
      whether we run on primary or not, and it affects both
5162
      the child assembly and the device own Open() execution
5163

5164
  """
5165
  if device.CreateOnSecondary():
5166
    force_create = True
5167

    
5168
  if device.children:
5169
    for child in device.children:
5170
      _CreateBlockDev(lu, node, instance, child, force_create,
5171
                      info, force_open)
5172

    
5173
  if not force_create:
5174
    return
5175

    
5176
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5177

    
5178

    
5179
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5180
  """Create a single block device on a given node.
5181

5182
  This will not recurse over children of the device, so they must be
5183
  created in advance.
5184

5185
  @param lu: the lu on whose behalf we execute
5186
  @param node: the node on which to create the device
5187
  @type instance: L{objects.Instance}
5188
  @param instance: the instance which owns the device
5189
  @type device: L{objects.Disk}
5190
  @param device: the device to create
5191
  @param info: the extra 'metadata' we should attach to the device
5192
      (this will be represented as a LVM tag)
5193
  @type force_open: boolean
5194
  @param force_open: this parameter will be passes to the
5195
      L{backend.BlockdevCreate} function where it specifies
5196
      whether we run on primary or not, and it affects both
5197
      the child assembly and the device own Open() execution
5198

5199
  """
5200
  lu.cfg.SetDiskID(device, node)
5201
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5202
                                       instance.name, force_open, info)
5203
  result.Raise("Can't create block device %s on"
5204
               " node %s for instance %s" % (device, node, instance.name))
5205
  if device.physical_id is None:
5206
    device.physical_id = result.payload
5207

    
5208

    
5209
def _GenerateUniqueNames(lu, exts):
5210
  """Generate a suitable LV name.
5211

5212
  This will generate a logical volume name for the given instance.
5213

5214
  """
5215
  results = []
5216
  for val in exts:
5217
    new_id = lu.cfg.GenerateUniqueID()
5218
    results.append("%s%s" % (new_id, val))
5219
  return results
5220

    
5221

    
5222
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5223
                         p_minor, s_minor):
5224
  """Generate a drbd8 device complete with its children.
5225

5226
  """
5227
  port = lu.cfg.AllocatePort()
5228
  vgname = lu.cfg.GetVGName()
5229
  shared_secret = lu.cfg.GenerateDRBDSecret()
5230
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5231
                          logical_id=(vgname, names[0]))
5232
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5233
                          logical_id=(vgname, names[1]))
5234
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5235
                          logical_id=(primary, secondary, port,
5236
                                      p_minor, s_minor,
5237
                                      shared_secret),
5238
                          children=[dev_data, dev_meta],
5239
                          iv_name=iv_name)
5240
  return drbd_dev
5241

    
5242

    
5243
def _GenerateDiskTemplate(lu, template_name,
5244
                          instance_name, primary_node,
5245
                          secondary_nodes, disk_info,
5246
                          file_storage_dir, file_driver,
5247
                          base_index):
5248
  """Generate the entire disk layout for a given template type.
5249

5250
  """
5251
  #TODO: compute space requirements
5252

    
5253
  vgname = lu.cfg.GetVGName()
5254
  disk_count = len(disk_info)
5255
  disks = []
5256
  if template_name == constants.DT_DISKLESS:
5257
    pass
5258
  elif template_name == constants.DT_PLAIN:
5259
    if len(secondary_nodes) != 0:
5260
      raise errors.ProgrammerError("Wrong template configuration")
5261

    
5262
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5263
                                      for i in range(disk_count)])
5264
    for idx, disk in enumerate(disk_info):
5265
      disk_index = idx + base_index
5266
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5267
                              logical_id=(vgname, names[idx]),
5268
                              iv_name="disk/%d" % disk_index,
5269
                              mode=disk["mode"])
5270
      disks.append(disk_dev)
5271
  elif template_name == constants.DT_DRBD8:
5272
    if len(secondary_nodes) != 1:
5273
      raise errors.ProgrammerError("Wrong template configuration")
5274
    remote_node = secondary_nodes[0]
5275
    minors = lu.cfg.AllocateDRBDMinor(
5276
      [primary_node, remote_node] * len(disk_info), instance_name)
5277

    
5278
    names = []
5279
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5280
                                               for i in range(disk_count)]):
5281
      names.append(lv_prefix + "_data")
5282
      names.append(lv_prefix + "_meta")
5283
    for idx, disk in enumerate(disk_info):
5284
      disk_index = idx + base_index
5285
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5286
                                      disk["size"], names[idx*2:idx*2+2],
5287
                                      "disk/%d" % disk_index,
5288
                                      minors[idx*2], minors[idx*2+1])
5289
      disk_dev.mode = disk["mode"]
5290
      disks.append(disk_dev)
5291
  elif template_name == constants.DT_FILE:
5292
    if len(secondary_nodes) != 0:
5293
      raise errors.ProgrammerError("Wrong template configuration")
5294

    
5295
    for idx, disk in enumerate(disk_info):
5296
      disk_index = idx + base_index
5297
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5298
                              iv_name="disk/%d" % disk_index,
5299
                              logical_id=(file_driver,
5300
                                          "%s/disk%d" % (file_storage_dir,
5301
                                                         disk_index)),
5302
                              mode=disk["mode"])
5303
      disks.append(disk_dev)
5304
  else:
5305
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5306
  return disks
5307

    
5308

    
5309
def _GetInstanceInfoText(instance):
5310
  """Compute that text that should be added to the disk's metadata.
5311

5312
  """
5313
  return "originstname+%s" % instance.name
5314

    
5315

    
5316
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5317
  """Create all disks for an instance.
5318

5319
  This abstracts away some work from AddInstance.
5320

5321
  @type lu: L{LogicalUnit}
5322
  @param lu: the logical unit on whose behalf we execute
5323
  @type instance: L{objects.Instance}
5324
  @param instance: the instance whose disks we should create
5325
  @type to_skip: list
5326
  @param to_skip: list of indices to skip
5327
  @type target_node: string
5328
  @param target_node: if passed, overrides the target node for creation
5329
  @rtype: boolean
5330
  @return: the success of the creation
5331

5332
  """
5333
  info = _GetInstanceInfoText(instance)
5334
  if target_node is None:
5335
    pnode = instance.primary_node
5336
    all_nodes = instance.all_nodes
5337
  else:
5338
    pnode = target_node
5339
    all_nodes = [pnode]
5340

    
5341
  if instance.disk_template == constants.DT_FILE:
5342
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5343
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5344

    
5345
    result.Raise("Failed to create directory '%s' on"
5346
                 " node %s" % (file_storage_dir, pnode))
5347

    
5348
  # Note: this needs to be kept in sync with adding of disks in
5349
  # LUSetInstanceParams
5350
  for idx, device in enumerate(instance.disks):
5351
    if to_skip and idx in to_skip:
5352
      continue
5353
    logging.info("Creating volume %s for instance %s",
5354
                 device.iv_name, instance.name)
5355
    #HARDCODE
5356
    for node in all_nodes:
5357
      f_create = node == pnode
5358
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5359

    
5360

    
5361
def _RemoveDisks(lu, instance, target_node=None):
5362
  """Remove all disks for an instance.
5363

5364
  This abstracts away some work from `AddInstance()` and
5365
  `RemoveInstance()`. Note that in case some of the devices couldn't
5366
  be removed, the removal will continue with the other ones (compare
5367
  with `_CreateDisks()`).
5368

5369
  @type lu: L{LogicalUnit}
5370
  @param lu: the logical unit on whose behalf we execute
5371
  @type instance: L{objects.Instance}
5372
  @param instance: the instance whose disks we should remove
5373
  @type target_node: string
5374
  @param target_node: used to override the node on which to remove the disks
5375
  @rtype: boolean
5376
  @return: the success of the removal
5377

5378
  """
5379
  logging.info("Removing block devices for instance %s", instance.name)
5380

    
5381
  all_result = True
5382
  for device in instance.disks:
5383
    if target_node:
5384
      edata = [(target_node, device)]
5385
    else:
5386
      edata = device.ComputeNodeTree(instance.primary_node)
5387
    for node, disk in edata:
5388
      lu.cfg.SetDiskID(disk, node)
5389
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5390
      if msg:
5391
        lu.LogWarning("Could not remove block device %s on node %s,"
5392
                      " continuing anyway: %s", device.iv_name, node, msg)
5393
        all_result = False
5394

    
5395
  if instance.disk_template == constants.DT_FILE:
5396
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5397
    if target_node:
5398
      tgt = target_node
5399
    else:
5400
      tgt = instance.primary_node
5401
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5402
    if result.fail_msg:
5403
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5404
                    file_storage_dir, instance.primary_node, result.fail_msg)
5405
      all_result = False
5406

    
5407
  return all_result
5408

    
5409

    
5410
def _ComputeDiskSize(disk_template, disks):
5411
  """Compute disk size requirements in the volume group
5412

5413
  """
5414
  # Required free disk space as a function of disk and swap space
5415
  req_size_dict = {
5416
    constants.DT_DISKLESS: None,
5417
    constants.DT_PLAIN: sum(d["size"] for d in disks),
5418
    # 128 MB are added for drbd metadata for each disk
5419
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5420
    constants.DT_FILE: None,
5421
  }
5422

    
5423
  if disk_template not in req_size_dict:
5424
    raise errors.ProgrammerError("Disk template '%s' size requirement"
5425
                                 " is unknown" %  disk_template)
5426

    
5427
  return req_size_dict[disk_template]
5428

    
5429

    
5430
def _CheckHVParams(lu, nodenames, hvname, hvparams):
5431
  """Hypervisor parameter validation.
5432

5433
  This function abstract the hypervisor parameter validation to be
5434
  used in both instance create and instance modify.
5435

5436
  @type lu: L{LogicalUnit}
5437
  @param lu: the logical unit for which we check
5438
  @type nodenames: list
5439
  @param nodenames: the list of nodes on which we should check
5440
  @type hvname: string
5441
  @param hvname: the name of the hypervisor we should use
5442
  @type hvparams: dict
5443
  @param hvparams: the parameters which we need to check
5444
  @raise errors.OpPrereqError: if the parameters are not valid
5445

5446
  """
5447
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5448
                                                  hvname,
5449
                                                  hvparams)
5450
  for node in nodenames:
5451
    info = hvinfo[node]
5452
    if info.offline:
5453
      continue
5454
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
5455

    
5456

    
5457
class LUCreateInstance(LogicalUnit):
5458
  """Create an instance.
5459

5460
  """
5461
  HPATH = "instance-add"
5462
  HTYPE = constants.HTYPE_INSTANCE
5463
  _OP_REQP = ["instance_name", "disks", "disk_template",
5464
              "mode", "start",
5465
              "wait_for_sync", "ip_check", "nics",
5466
              "hvparams", "beparams"]
5467
  REQ_BGL = False
5468

    
5469
  def _ExpandNode(self, node):
5470
    """Expands and checks one node name.
5471

5472
    """
5473
    node_full = self.cfg.ExpandNodeName(node)
5474
    if node_full is None:
5475
      raise errors.OpPrereqError("Unknown node %s" % node)
5476
    return node_full
5477

    
5478
  def ExpandNames(self):
5479
    """ExpandNames for CreateInstance.
5480

5481
    Figure out the right locks for instance creation.
5482

5483
    """
5484
    self.needed_locks = {}
5485

    
5486
    # set optional parameters to none if they don't exist
5487
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
5488
      if not hasattr(self.op, attr):
5489
        setattr(self.op, attr, None)
5490

    
5491
    # cheap checks, mostly valid constants given
5492

    
5493
    # verify creation mode
5494
    if self.op.mode not in (constants.INSTANCE_CREATE,
5495
                            constants.INSTANCE_IMPORT):
5496
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
5497
                                 self.op.mode)
5498

    
5499
    # disk template and mirror node verification
5500
    if self.op.disk_template not in constants.DISK_TEMPLATES:
5501
      raise errors.OpPrereqError("Invalid disk template name")
5502

    
5503
    if self.op.hypervisor is None:
5504
      self.op.hypervisor = self.cfg.GetHypervisorType()
5505

    
5506
    cluster = self.cfg.GetClusterInfo()
5507
    enabled_hvs = cluster.enabled_hypervisors
5508
    if self.op.hypervisor not in enabled_hvs:
5509
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
5510
                                 " cluster (%s)" % (self.op.hypervisor,
5511
                                  ",".join(enabled_hvs)))
5512

    
5513
    # check hypervisor parameter syntax (locally)
5514
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5515
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
5516
                                  self.op.hvparams)
5517
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
5518
    hv_type.CheckParameterSyntax(filled_hvp)
5519
    self.hv_full = filled_hvp
5520

    
5521
    # fill and remember the beparams dict
5522
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5523
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
5524
                                    self.op.beparams)
5525

    
5526
    #### instance parameters check
5527

    
5528
    # instance name verification
5529
    hostname1 = utils.HostInfo(self.op.instance_name)
5530
    self.op.instance_name = instance_name = hostname1.name
5531

    
5532
    # this is just a preventive check, but someone might still add this
5533
    # instance in the meantime, and creation will fail at lock-add time
5534
    if instance_name in self.cfg.GetInstanceList():
5535
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5536
                                 instance_name)
5537

    
5538
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
5539

    
5540
    # NIC buildup
5541
    self.nics = []
5542
    for idx, nic in enumerate(self.op.nics):
5543
      nic_mode_req = nic.get("mode", None)
5544
      nic_mode = nic_mode_req
5545
      if nic_mode is None:
5546
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5547

    
5548
      # in routed mode, for the first nic, the default ip is 'auto'
5549
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
5550
        default_ip_mode = constants.VALUE_AUTO
5551
      else:
5552
        default_ip_mode = constants.VALUE_NONE
5553

    
5554
      # ip validity checks
5555
      ip = nic.get("ip", default_ip_mode)
5556
      if ip is None or ip.lower() == constants.VALUE_NONE:
5557
        nic_ip = None
5558
      elif ip.lower() == constants.VALUE_AUTO:
5559
        nic_ip = hostname1.ip
5560
      else:
5561
        if not utils.IsValidIP(ip):
5562
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
5563
                                     " like a valid IP" % ip)
5564
        nic_ip = ip
5565

    
5566
      # TODO: check the ip for uniqueness !!
5567
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
5568
        raise errors.OpPrereqError("Routed nic mode requires an ip address")
5569

    
5570
      # MAC address verification
5571
      mac = nic.get("mac", constants.VALUE_AUTO)
5572
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5573
        if not utils.IsValidMac(mac.lower()):
5574
          raise errors.OpPrereqError("Invalid MAC address specified: %s" %
5575
                                     mac)
5576
        else:
5577
          # or validate/reserve the current one
5578
          if self.cfg.IsMacInUse(mac):
5579
            raise errors.OpPrereqError("MAC address %s already in use"
5580
                                       " in cluster" % mac)
5581

    
5582
      # bridge verification
5583
      bridge = nic.get("bridge", None)
5584
      link = nic.get("link", None)
5585
      if bridge and link:
5586
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
5587
                                   " at the same time")
5588
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
5589
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic")
5590
      elif bridge:
5591
        link = bridge
5592

    
5593
      nicparams = {}
5594
      if nic_mode_req:
5595
        nicparams[constants.NIC_MODE] = nic_mode_req
5596
      if link:
5597
        nicparams[constants.NIC_LINK] = link
5598

    
5599
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
5600
                                      nicparams)
5601
      objects.NIC.CheckParameterSyntax(check_params)
5602
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
5603

    
5604
    # disk checks/pre-build
5605
    self.disks = []
5606
    for disk in self.op.disks:
5607
      mode = disk.get("mode", constants.DISK_RDWR)
5608
      if mode not in constants.DISK_ACCESS_SET:
5609
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
5610
                                   mode)
5611
      size = disk.get("size", None)
5612
      if size is None:
5613
        raise errors.OpPrereqError("Missing disk size")
5614
      try:
5615
        size = int(size)
5616
      except ValueError:
5617
        raise errors.OpPrereqError("Invalid disk size '%s'" % size)
5618
      self.disks.append({"size": size, "mode": mode})
5619

    
5620
    # used in CheckPrereq for ip ping check
5621
    self.check_ip = hostname1.ip
5622

    
5623
    # file storage checks
5624
    if (self.op.file_driver and
5625
        not self.op.file_driver in constants.FILE_DRIVER):
5626
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
5627
                                 self.op.file_driver)
5628

    
5629
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
5630
      raise errors.OpPrereqError("File storage directory path not absolute")
5631

    
5632
    ### Node/iallocator related checks
5633
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
5634
      raise errors.OpPrereqError("One and only one of iallocator and primary"
5635
                                 " node must be given")
5636

    
5637
    if self.op.iallocator:
5638
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5639
    else:
5640
      self.op.pnode = self._ExpandNode(self.op.pnode)
5641
      nodelist = [self.op.pnode]
5642
      if self.op.snode is not None:
5643
        self.op.snode = self._ExpandNode(self.op.snode)
5644
        nodelist.append(self.op.snode)
5645
      self.needed_locks[locking.LEVEL_NODE] = nodelist
5646

    
5647
    # in case of import lock the source node too
5648
    if self.op.mode == constants.INSTANCE_IMPORT:
5649
      src_node = getattr(self.op, "src_node", None)
5650
      src_path = getattr(self.op, "src_path", None)
5651

    
5652
      if src_path is None:
5653
        self.op.src_path = src_path = self.op.instance_name
5654

    
5655
      if src_node is None:
5656
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5657
        self.op.src_node = None
5658
        if os.path.isabs(src_path):
5659
          raise errors.OpPrereqError("Importing an instance from an absolute"
5660
                                     " path requires a source node option.")
5661
      else:
5662
        self.op.src_node = src_node = self._ExpandNode(src_node)
5663
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5664
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
5665
        if not os.path.isabs(src_path):
5666
          self.op.src_path = src_path = \
5667
            os.path.join(constants.EXPORT_DIR, src_path)
5668

    
5669
      # On import force_variant must be True, because if we forced it at
5670
      # initial install, our only chance when importing it back is that it
5671
      # works again!
5672
      self.op.force_variant = True
5673

    
5674
    else: # INSTANCE_CREATE
5675
      if getattr(self.op, "os_type", None) is None:
5676
        raise errors.OpPrereqError("No guest OS specified")
5677
      self.op.force_variant = getattr(self.op, "force_variant", False)
5678

    
5679
  def _RunAllocator(self):
5680
    """Run the allocator based on input opcode.
5681

5682
    """
5683
    nics = [n.ToDict() for n in self.nics]
5684
    ial = IAllocator(self.cfg, self.rpc,
5685
                     mode=constants.IALLOCATOR_MODE_ALLOC,
5686
                     name=self.op.instance_name,
5687
                     disk_template=self.op.disk_template,
5688
                     tags=[],
5689
                     os=self.op.os_type,
5690
                     vcpus=self.be_full[constants.BE_VCPUS],
5691
                     mem_size=self.be_full[constants.BE_MEMORY],
5692
                     disks=self.disks,
5693
                     nics=nics,
5694
                     hypervisor=self.op.hypervisor,
5695
                     )
5696

    
5697
    ial.Run(self.op.iallocator)
5698

    
5699
    if not ial.success:
5700
      raise errors.OpPrereqError("Can't compute nodes using"
5701
                                 " iallocator '%s': %s" % (self.op.iallocator,
5702
                                                           ial.info))
5703
    if len(ial.nodes) != ial.required_nodes:
5704
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5705
                                 " of nodes (%s), required %s" %
5706
                                 (self.op.iallocator, len(ial.nodes),
5707
                                  ial.required_nodes))
5708
    self.op.pnode = ial.nodes[0]
5709
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
5710
                 self.op.instance_name, self.op.iallocator,
5711
                 ", ".join(ial.nodes))
5712
    if ial.required_nodes == 2:
5713
      self.op.snode = ial.nodes[1]
5714

    
5715
  def BuildHooksEnv(self):
5716
    """Build hooks env.
5717

5718
    This runs on master, primary and secondary nodes of the instance.
5719

5720
    """
5721
    env = {
5722
      "ADD_MODE": self.op.mode,
5723
      }
5724
    if self.op.mode == constants.INSTANCE_IMPORT:
5725
      env["SRC_NODE"] = self.op.src_node
5726
      env["SRC_PATH"] = self.op.src_path
5727
      env["SRC_IMAGES"] = self.src_images
5728

    
5729
    env.update(_BuildInstanceHookEnv(
5730
      name=self.op.instance_name,
5731
      primary_node=self.op.pnode,
5732
      secondary_nodes=self.secondaries,
5733
      status=self.op.start,
5734
      os_type=self.op.os_type,
5735
      memory=self.be_full[constants.BE_MEMORY],
5736
      vcpus=self.be_full[constants.BE_VCPUS],
5737
      nics=_NICListToTuple(self, self.nics),
5738
      disk_template=self.op.disk_template,
5739
      disks=[(d["size"], d["mode"]) for d in self.disks],
5740
      bep=self.be_full,
5741
      hvp=self.hv_full,
5742
      hypervisor_name=self.op.hypervisor,
5743
    ))
5744

    
5745
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
5746
          self.secondaries)
5747
    return env, nl, nl
5748

    
5749

    
5750
  def CheckPrereq(self):
5751
    """Check prerequisites.
5752

5753
    """
5754
    if (not self.cfg.GetVGName() and
5755
        self.op.disk_template not in constants.DTS_NOT_LVM):
5756
      raise errors.OpPrereqError("Cluster does not support lvm-based"
5757
                                 " instances")
5758

    
5759
    if self.op.mode == constants.INSTANCE_IMPORT:
5760
      src_node = self.op.src_node
5761
      src_path = self.op.src_path
5762

    
5763
      if src_node is None:
5764
        locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
5765
        exp_list = self.rpc.call_export_list(locked_nodes)
5766
        found = False
5767
        for node in exp_list:
5768
          if exp_list[node].fail_msg:
5769
            continue
5770
          if src_path in exp_list[node].payload:
5771
            found = True
5772
            self.op.src_node = src_node = node
5773
            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
5774
                                                       src_path)
5775
            break
5776
        if not found:
5777
          raise errors.OpPrereqError("No export found for relative path %s" %
5778
                                      src_path)
5779

    
5780
      _CheckNodeOnline(self, src_node)
5781
      result = self.rpc.call_export_info(src_node, src_path)
5782
      result.Raise("No export or invalid export found in dir %s" % src_path)
5783

    
5784
      export_info = objects.SerializableConfigParser.Loads(str(result.payload))
5785
      if not export_info.has_section(constants.INISECT_EXP):
5786
        raise errors.ProgrammerError("Corrupted export config")
5787

    
5788
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
5789
      if (int(ei_version) != constants.EXPORT_VERSION):
5790
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
5791
                                   (ei_version, constants.EXPORT_VERSION))
5792

    
5793
      # Check that the new instance doesn't have less disks than the export
5794
      instance_disks = len(self.disks)
5795
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
5796
      if instance_disks < export_disks:
5797
        raise errors.OpPrereqError("Not enough disks to import."
5798
                                   " (instance: %d, export: %d)" %
5799
                                   (instance_disks, export_disks))
5800

    
5801
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
5802
      disk_images = []
5803
      for idx in range(export_disks):
5804
        option = 'disk%d_dump' % idx
5805
        if export_info.has_option(constants.INISECT_INS, option):
5806
          # FIXME: are the old os-es, disk sizes, etc. useful?
5807
          export_name = export_info.get(constants.INISECT_INS, option)
5808
          image = os.path.join(src_path, export_name)
5809
          disk_images.append(image)
5810
        else:
5811
          disk_images.append(False)
5812

    
5813
      self.src_images = disk_images
5814

    
5815
      old_name = export_info.get(constants.INISECT_INS, 'name')
5816
      # FIXME: int() here could throw a ValueError on broken exports
5817
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
5818
      if self.op.instance_name == old_name:
5819
        for idx, nic in enumerate(self.nics):
5820
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
5821
            nic_mac_ini = 'nic%d_mac' % idx
5822
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
5823

    
5824
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
5825
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
5826
    if self.op.start and not self.op.ip_check:
5827
      raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
5828
                                 " adding an instance in start mode")
5829

    
5830
    if self.op.ip_check:
5831
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
5832
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5833
                                   (self.check_ip, self.op.instance_name))
5834

    
5835
    #### mac address generation
5836
    # By generating here the mac address both the allocator and the hooks get
5837
    # the real final mac address rather than the 'auto' or 'generate' value.
5838
    # There is a race condition between the generation and the instance object
5839
    # creation, which means that we know the mac is valid now, but we're not
5840
    # sure it will be when we actually add the instance. If things go bad
5841
    # adding the instance will abort because of a duplicate mac, and the
5842
    # creation job will fail.
5843
    for nic in self.nics:
5844
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5845
        nic.mac = self.cfg.GenerateMAC()
5846

    
5847
    #### allocator run
5848

    
5849
    if self.op.iallocator is not None:
5850
      self._RunAllocator()
5851

    
5852
    #### node related checks
5853

    
5854
    # check primary node
5855
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
5856
    assert self.pnode is not None, \
5857
      "Cannot retrieve locked node %s" % self.op.pnode
5858
    if pnode.offline:
5859
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
5860
                                 pnode.name)
5861
    if pnode.drained:
5862
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
5863
                                 pnode.name)
5864

    
5865
    self.secondaries = []
5866

    
5867
    # mirror node verification
5868
    if self.op.disk_template in constants.DTS_NET_MIRROR:
5869
      if self.op.snode is None:
5870
        raise errors.OpPrereqError("The networked disk templates need"
5871
                                   " a mirror node")
5872
      if self.op.snode == pnode.name:
5873
        raise errors.OpPrereqError("The secondary node cannot be"
5874
                                   " the primary node.")
5875
      _CheckNodeOnline(self, self.op.snode)
5876
      _CheckNodeNotDrained(self, self.op.snode)
5877
      self.secondaries.append(self.op.snode)
5878

    
5879
    nodenames = [pnode.name] + self.secondaries
5880

    
5881
    req_size = _ComputeDiskSize(self.op.disk_template,
5882
                                self.disks)
5883

    
5884
    # Check lv size requirements
5885
    if req_size is not None:
5886
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
5887
                                         self.op.hypervisor)
5888
      for node in nodenames:
5889
        info = nodeinfo[node]
5890
        info.Raise("Cannot get current information from node %s" % node)
5891
        info = info.payload
5892
        vg_free = info.get('vg_free', None)
5893
        if not isinstance(vg_free, int):
5894
          raise errors.OpPrereqError("Can't compute free disk space on"
5895
                                     " node %s" % node)
5896
        if req_size > vg_free:
5897
          raise errors.OpPrereqError("Not enough disk space on target node %s."
5898
                                     " %d MB available, %d MB required" %
5899
                                     (node, vg_free, req_size))
5900

    
5901
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
5902

    
5903
    # os verification
5904
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
5905
    result.Raise("OS '%s' not in supported os list for primary node %s" %
5906
                 (self.op.os_type, pnode.name), prereq=True)
5907
    if not self.op.force_variant:
5908
      _CheckOSVariant(result.payload, self.op.os_type)
5909

    
5910
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
5911

    
5912
    # memory check on primary node
5913
    if self.op.start:
5914
      _CheckNodeFreeMemory(self, self.pnode.name,
5915
                           "creating instance %s" % self.op.instance_name,
5916
                           self.be_full[constants.BE_MEMORY],
5917
                           self.op.hypervisor)
5918

    
5919
    self.dry_run_result = list(nodenames)
5920

    
5921
  def Exec(self, feedback_fn):
5922
    """Create and add the instance to the cluster.
5923

5924
    """
5925
    instance = self.op.instance_name
5926
    pnode_name = self.pnode.name
5927

    
5928
    ht_kind = self.op.hypervisor
5929
    if ht_kind in constants.HTS_REQ_PORT:
5930
      network_port = self.cfg.AllocatePort()
5931
    else:
5932
      network_port = None
5933

    
5934
    ##if self.op.vnc_bind_address is None:
5935
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
5936

    
5937
    # this is needed because os.path.join does not accept None arguments
5938
    if self.op.file_storage_dir is None:
5939
      string_file_storage_dir = ""
5940
    else:
5941
      string_file_storage_dir = self.op.file_storage_dir
5942

    
5943
    # build the full file storage dir path
5944
    file_storage_dir = os.path.normpath(os.path.join(
5945
                                        self.cfg.GetFileStorageDir(),
5946
                                        string_file_storage_dir, instance))
5947

    
5948

    
5949
    disks = _GenerateDiskTemplate(self,
5950
                                  self.op.disk_template,
5951
                                  instance, pnode_name,
5952
                                  self.secondaries,
5953
                                  self.disks,
5954
                                  file_storage_dir,
5955
                                  self.op.file_driver,
5956
                                  0)
5957

    
5958
    iobj = objects.Instance(name=instance, os=self.op.os_type,
5959
                            primary_node=pnode_name,
5960
                            nics=self.nics, disks=disks,
5961
                            disk_template=self.op.disk_template,
5962
                            admin_up=False,
5963
                            network_port=network_port,
5964
                            beparams=self.op.beparams,
5965
                            hvparams=self.op.hvparams,
5966
                            hypervisor=self.op.hypervisor,
5967
                            )
5968

    
5969
    feedback_fn("* creating instance disks...")
5970
    try:
5971
      _CreateDisks(self, iobj)
5972
    except errors.OpExecError:
5973
      self.LogWarning("Device creation failed, reverting...")
5974
      try:
5975
        _RemoveDisks(self, iobj)
5976
      finally:
5977
        self.cfg.ReleaseDRBDMinors(instance)
5978
        raise
5979

    
5980
    feedback_fn("adding instance %s to cluster config" % instance)
5981

    
5982
    self.cfg.AddInstance(iobj)
5983
    # Declare that we don't want to remove the instance lock anymore, as we've
5984
    # added the instance to the config
5985
    del self.remove_locks[locking.LEVEL_INSTANCE]
5986
    # Unlock all the nodes
5987
    if self.op.mode == constants.INSTANCE_IMPORT:
5988
      nodes_keep = [self.op.src_node]
5989
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
5990
                       if node != self.op.src_node]
5991
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
5992
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
5993
    else:
5994
      self.context.glm.release(locking.LEVEL_NODE)
5995
      del self.acquired_locks[locking.LEVEL_NODE]
5996

    
5997
    if self.op.wait_for_sync:
5998
      disk_abort = not _WaitForSync(self, iobj)
5999
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
6000
      # make sure the disks are not degraded (still sync-ing is ok)
6001
      time.sleep(15)
6002
      feedback_fn("* checking mirrors status")
6003
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6004
    else:
6005
      disk_abort = False
6006

    
6007
    if disk_abort:
6008
      _RemoveDisks(self, iobj)
6009
      self.cfg.RemoveInstance(iobj.name)
6010
      # Make sure the instance lock gets removed
6011
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6012
      raise errors.OpExecError("There are some degraded disks for"
6013
                               " this instance")
6014

    
6015
    feedback_fn("creating os for instance %s on node %s" %
6016
                (instance, pnode_name))
6017

    
6018
    if iobj.disk_template != constants.DT_DISKLESS:
6019
      if self.op.mode == constants.INSTANCE_CREATE:
6020
        feedback_fn("* running the instance OS create scripts...")
6021
        result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
6022
        result.Raise("Could not add os for instance %s"
6023
                     " on node %s" % (instance, pnode_name))
6024

    
6025
      elif self.op.mode == constants.INSTANCE_IMPORT:
6026
        feedback_fn("* running the instance OS import scripts...")
6027
        src_node = self.op.src_node
6028
        src_images = self.src_images
6029
        cluster_name = self.cfg.GetClusterName()
6030
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6031
                                                         src_node, src_images,
6032
                                                         cluster_name)
6033
        msg = import_result.fail_msg
6034
        if msg:
6035
          self.LogWarning("Error while importing the disk images for instance"
6036
                          " %s on node %s: %s" % (instance, pnode_name, msg))
6037
      else:
6038
        # also checked in the prereq part
6039
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6040
                                     % self.op.mode)
6041

    
6042
    if self.op.start:
6043
      iobj.admin_up = True
6044
      self.cfg.Update(iobj, feedback_fn)
6045
      logging.info("Starting instance %s on node %s", instance, pnode_name)
6046
      feedback_fn("* starting instance...")
6047
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6048
      result.Raise("Could not start instance")
6049

    
6050
    return list(iobj.all_nodes)
6051

    
6052

    
6053
class LUConnectConsole(NoHooksLU):
6054
  """Connect to an instance's console.
6055

6056
  This is somewhat special in that it returns the command line that
6057
  you need to run on the master node in order to connect to the
6058
  console.
6059

6060
  """
6061
  _OP_REQP = ["instance_name"]
6062
  REQ_BGL = False
6063

    
6064
  def ExpandNames(self):
6065
    self._ExpandAndLockInstance()
6066

    
6067
  def CheckPrereq(self):
6068
    """Check prerequisites.
6069

6070
    This checks that the instance is in the cluster.
6071

6072
    """
6073
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6074
    assert self.instance is not None, \
6075
      "Cannot retrieve locked instance %s" % self.op.instance_name
6076
    _CheckNodeOnline(self, self.instance.primary_node)
6077

    
6078
  def Exec(self, feedback_fn):
6079
    """Connect to the console of an instance
6080

6081
    """
6082
    instance = self.instance
6083
    node = instance.primary_node
6084

    
6085
    node_insts = self.rpc.call_instance_list([node],
6086
                                             [instance.hypervisor])[node]
6087
    node_insts.Raise("Can't get node information from %s" % node)
6088

    
6089
    if instance.name not in node_insts.payload:
6090
      raise errors.OpExecError("Instance %s is not running." % instance.name)
6091

    
6092
    logging.debug("Connecting to console of %s on %s", instance.name, node)
6093

    
6094
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
6095
    cluster = self.cfg.GetClusterInfo()
6096
    # beparams and hvparams are passed separately, to avoid editing the
6097
    # instance and then saving the defaults in the instance itself.
6098
    hvparams = cluster.FillHV(instance)
6099
    beparams = cluster.FillBE(instance)
6100
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6101

    
6102
    # build ssh cmdline
6103
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6104

    
6105

    
6106
class LUReplaceDisks(LogicalUnit):
6107
  """Replace the disks of an instance.
6108

6109
  """
6110
  HPATH = "mirrors-replace"
6111
  HTYPE = constants.HTYPE_INSTANCE
6112
  _OP_REQP = ["instance_name", "mode", "disks"]
6113
  REQ_BGL = False
6114

    
6115
  def CheckArguments(self):
6116
    if not hasattr(self.op, "remote_node"):
6117
      self.op.remote_node = None
6118
    if not hasattr(self.op, "iallocator"):
6119
      self.op.iallocator = None
6120

    
6121
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6122
                                  self.op.iallocator)
6123

    
6124
  def ExpandNames(self):
6125
    self._ExpandAndLockInstance()
6126

    
6127
    if self.op.iallocator is not None:
6128
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6129

    
6130
    elif self.op.remote_node is not None:
6131
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
6132
      if remote_node is None:
6133
        raise errors.OpPrereqError("Node '%s' not known" %
6134
                                   self.op.remote_node)
6135

    
6136
      self.op.remote_node = remote_node
6137

    
6138
      # Warning: do not remove the locking of the new secondary here
6139
      # unless DRBD8.AddChildren is changed to work in parallel;
6140
      # currently it doesn't since parallel invocations of
6141
      # FindUnusedMinor will conflict
6142
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6143
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6144

    
6145
    else:
6146
      self.needed_locks[locking.LEVEL_NODE] = []
6147
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6148

    
6149
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6150
                                   self.op.iallocator, self.op.remote_node,
6151
                                   self.op.disks)
6152

    
6153
    self.tasklets = [self.replacer]
6154

    
6155
  def DeclareLocks(self, level):
6156
    # If we're not already locking all nodes in the set we have to declare the
6157
    # instance's primary/secondary nodes.
6158
    if (level == locking.LEVEL_NODE and
6159
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6160
      self._LockInstancesNodes()
6161

    
6162
  def BuildHooksEnv(self):
6163
    """Build hooks env.
6164

6165
    This runs on the master, the primary and all the secondaries.
6166

6167
    """
6168
    instance = self.replacer.instance
6169
    env = {
6170
      "MODE": self.op.mode,
6171
      "NEW_SECONDARY": self.op.remote_node,
6172
      "OLD_SECONDARY": instance.secondary_nodes[0],
6173
      }
6174
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6175
    nl = [
6176
      self.cfg.GetMasterNode(),
6177
      instance.primary_node,
6178
      ]
6179
    if self.op.remote_node is not None:
6180
      nl.append(self.op.remote_node)
6181
    return env, nl, nl
6182

    
6183

    
6184
class LUEvacuateNode(LogicalUnit):
6185
  """Relocate the secondary instances from a node.
6186

6187
  """
6188
  HPATH = "node-evacuate"
6189
  HTYPE = constants.HTYPE_NODE
6190
  _OP_REQP = ["node_name"]
6191
  REQ_BGL = False
6192

    
6193
  def CheckArguments(self):
6194
    if not hasattr(self.op, "remote_node"):
6195
      self.op.remote_node = None
6196
    if not hasattr(self.op, "iallocator"):
6197
      self.op.iallocator = None
6198

    
6199
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6200
                                  self.op.remote_node,
6201
                                  self.op.iallocator)
6202

    
6203
  def ExpandNames(self):
6204
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
6205
    if self.op.node_name is None:
6206
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
6207

    
6208
    self.needed_locks = {}
6209

    
6210
    # Declare node locks
6211
    if self.op.iallocator is not None:
6212
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6213

    
6214
    elif self.op.remote_node is not None:
6215
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
6216
      if remote_node is None:
6217
        raise errors.OpPrereqError("Node '%s' not known" %
6218
                                   self.op.remote_node)
6219

    
6220
      self.op.remote_node = remote_node
6221

    
6222
      # Warning: do not remove the locking of the new secondary here
6223
      # unless DRBD8.AddChildren is changed to work in parallel;
6224
      # currently it doesn't since parallel invocations of
6225
      # FindUnusedMinor will conflict
6226
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6227
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6228

    
6229
    else:
6230
      raise errors.OpPrereqError("Invalid parameters")
6231

    
6232
    # Create tasklets for replacing disks for all secondary instances on this
6233
    # node
6234
    names = []
6235
    tasklets = []
6236

    
6237
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6238
      logging.debug("Replacing disks for instance %s", inst.name)
6239
      names.append(inst.name)
6240

    
6241
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6242
                                self.op.iallocator, self.op.remote_node, [])
6243
      tasklets.append(replacer)
6244

    
6245
    self.tasklets = tasklets
6246
    self.instance_names = names
6247

    
6248
    # Declare instance locks
6249
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6250

    
6251
  def DeclareLocks(self, level):
6252
    # If we're not already locking all nodes in the set we have to declare the
6253
    # instance's primary/secondary nodes.
6254
    if (level == locking.LEVEL_NODE and
6255
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6256
      self._LockInstancesNodes()
6257

    
6258
  def BuildHooksEnv(self):
6259
    """Build hooks env.
6260

6261
    This runs on the master, the primary and all the secondaries.
6262

6263
    """
6264
    env = {
6265
      "NODE_NAME": self.op.node_name,
6266
      }
6267

    
6268
    nl = [self.cfg.GetMasterNode()]
6269

    
6270
    if self.op.remote_node is not None:
6271
      env["NEW_SECONDARY"] = self.op.remote_node
6272
      nl.append(self.op.remote_node)
6273

    
6274
    return (env, nl, nl)
6275

    
6276

    
6277
class TLReplaceDisks(Tasklet):
6278
  """Replaces disks for an instance.
6279

6280
  Note: Locking is not within the scope of this class.
6281

6282
  """
6283
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6284
               disks):
6285
    """Initializes this class.
6286

6287
    """
6288
    Tasklet.__init__(self, lu)
6289

    
6290
    # Parameters
6291
    self.instance_name = instance_name
6292
    self.mode = mode
6293
    self.iallocator_name = iallocator_name
6294
    self.remote_node = remote_node
6295
    self.disks = disks
6296

    
6297
    # Runtime data
6298
    self.instance = None
6299
    self.new_node = None
6300
    self.target_node = None
6301
    self.other_node = None
6302
    self.remote_node_info = None
6303
    self.node_secondary_ip = None
6304

    
6305
  @staticmethod
6306
  def CheckArguments(mode, remote_node, iallocator):
6307
    """Helper function for users of this class.
6308

6309
    """
6310
    # check for valid parameter combination
6311
    if mode == constants.REPLACE_DISK_CHG:
6312
      if remote_node is None and iallocator is None:
6313
        raise errors.OpPrereqError("When changing the secondary either an"
6314
                                   " iallocator script must be used or the"
6315
                                   " new node given")
6316

    
6317
      if remote_node is not None and iallocator is not None:
6318
        raise errors.OpPrereqError("Give either the iallocator or the new"
6319
                                   " secondary, not both")
6320

    
6321
    elif remote_node is not None or iallocator is not None:
6322
      # Not replacing the secondary
6323
      raise errors.OpPrereqError("The iallocator and new node options can"
6324
                                 " only be used when changing the"
6325
                                 " secondary node")
6326

    
6327
  @staticmethod
6328
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
6329
    """Compute a new secondary node using an IAllocator.
6330

6331
    """
6332
    ial = IAllocator(lu.cfg, lu.rpc,
6333
                     mode=constants.IALLOCATOR_MODE_RELOC,
6334
                     name=instance_name,
6335
                     relocate_from=relocate_from)
6336

    
6337
    ial.Run(iallocator_name)
6338

    
6339
    if not ial.success:
6340
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
6341
                                 " %s" % (iallocator_name, ial.info))
6342

    
6343
    if len(ial.nodes) != ial.required_nodes:
6344
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6345
                                 " of nodes (%s), required %s" %
6346
                                 (len(ial.nodes), ial.required_nodes))
6347

    
6348
    remote_node_name = ial.nodes[0]
6349

    
6350
    lu.LogInfo("Selected new secondary for instance '%s': %s",
6351
               instance_name, remote_node_name)
6352

    
6353
    return remote_node_name
6354

    
6355
  def _FindFaultyDisks(self, node_name):
6356
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
6357
                                    node_name, True)
6358

    
6359
  def CheckPrereq(self):
6360
    """Check prerequisites.
6361

6362
    This checks that the instance is in the cluster.
6363

6364
    """
6365
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
6366
    assert instance is not None, \
6367
      "Cannot retrieve locked instance %s" % instance_name
6368

    
6369
    if instance.disk_template != constants.DT_DRBD8:
6370
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
6371
                                 " instances")
6372

    
6373
    if len(instance.secondary_nodes) != 1:
6374
      raise errors.OpPrereqError("The instance has a strange layout,"
6375
                                 " expected one secondary but found %d" %
6376
                                 len(instance.secondary_nodes))
6377

    
6378
    secondary_node = instance.secondary_nodes[0]
6379

    
6380
    if self.iallocator_name is None:
6381
      remote_node = self.remote_node
6382
    else:
6383
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
6384
                                       instance.name, instance.secondary_nodes)
6385

    
6386
    if remote_node is not None:
6387
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
6388
      assert self.remote_node_info is not None, \
6389
        "Cannot retrieve locked node %s" % remote_node
6390
    else:
6391
      self.remote_node_info = None
6392

    
6393
    if remote_node == self.instance.primary_node:
6394
      raise errors.OpPrereqError("The specified node is the primary node of"
6395
                                 " the instance.")
6396

    
6397
    if remote_node == secondary_node:
6398
      raise errors.OpPrereqError("The specified node is already the"
6399
                                 " secondary node of the instance.")
6400

    
6401
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
6402
                                    constants.REPLACE_DISK_CHG):
6403
      raise errors.OpPrereqError("Cannot specify disks to be replaced")
6404

    
6405
    if self.mode == constants.REPLACE_DISK_AUTO:
6406
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
6407
      faulty_secondary = self._FindFaultyDisks(secondary_node)
6408

    
6409
      if faulty_primary and faulty_secondary:
6410
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
6411
                                   " one node and can not be repaired"
6412
                                   " automatically" % self.instance_name)
6413

    
6414
      if faulty_primary:
6415
        self.disks = faulty_primary
6416
        self.target_node = instance.primary_node
6417
        self.other_node = secondary_node
6418
        check_nodes = [self.target_node, self.other_node]
6419
      elif faulty_secondary:
6420
        self.disks = faulty_secondary
6421
        self.target_node = secondary_node
6422
        self.other_node = instance.primary_node
6423
        check_nodes = [self.target_node, self.other_node]
6424
      else:
6425
        self.disks = []
6426
        check_nodes = []
6427

    
6428
    else:
6429
      # Non-automatic modes
6430
      if self.mode == constants.REPLACE_DISK_PRI:
6431
        self.target_node = instance.primary_node
6432
        self.other_node = secondary_node
6433
        check_nodes = [self.target_node, self.other_node]
6434

    
6435
      elif self.mode == constants.REPLACE_DISK_SEC:
6436
        self.target_node = secondary_node
6437
        self.other_node = instance.primary_node
6438
        check_nodes = [self.target_node, self.other_node]
6439

    
6440
      elif self.mode == constants.REPLACE_DISK_CHG:
6441
        self.new_node = remote_node
6442
        self.other_node = instance.primary_node
6443
        self.target_node = secondary_node
6444
        check_nodes = [self.new_node, self.other_node]
6445

    
6446
        _CheckNodeNotDrained(self.lu, remote_node)
6447

    
6448
      else:
6449
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
6450
                                     self.mode)
6451

    
6452
      # If not specified all disks should be replaced
6453
      if not self.disks:
6454
        self.disks = range(len(self.instance.disks))
6455

    
6456
    for node in check_nodes:
6457
      _CheckNodeOnline(self.lu, node)
6458

    
6459
    # Check whether disks are valid
6460
    for disk_idx in self.disks:
6461
      instance.FindDisk(disk_idx)
6462

    
6463
    # Get secondary node IP addresses
6464
    node_2nd_ip = {}
6465

    
6466
    for node_name in [self.target_node, self.other_node, self.new_node]:
6467
      if node_name is not None:
6468
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
6469

    
6470
    self.node_secondary_ip = node_2nd_ip
6471

    
6472
  def Exec(self, feedback_fn):
6473
    """Execute disk replacement.
6474

6475
    This dispatches the disk replacement to the appropriate handler.
6476

6477
    """
6478
    if not self.disks:
6479
      feedback_fn("No disks need replacement")
6480
      return
6481

    
6482
    feedback_fn("Replacing disk(s) %s for %s" %
6483
                (", ".join([str(i) for i in self.disks]), self.instance.name))
6484

    
6485
    activate_disks = (not self.instance.admin_up)
6486

    
6487
    # Activate the instance disks if we're replacing them on a down instance
6488
    if activate_disks:
6489
      _StartInstanceDisks(self.lu, self.instance, True)
6490

    
6491
    try:
6492
      # Should we replace the secondary node?
6493
      if self.new_node is not None:
6494
        fn = self._ExecDrbd8Secondary
6495
      else:
6496
        fn = self._ExecDrbd8DiskOnly
6497

    
6498
      return fn(feedback_fn)
6499

    
6500
    finally:
6501
      # Deactivate the instance disks if we're replacing them on a down instance
6502
      if activate_disks:
6503
        _SafeShutdownInstanceDisks(self.lu, self.instance)
6504

    
6505
  def _CheckVolumeGroup(self, nodes):
6506
    self.lu.LogInfo("Checking volume groups")
6507

    
6508
    vgname = self.cfg.GetVGName()
6509

    
6510
    # Make sure volume group exists on all involved nodes
6511
    results = self.rpc.call_vg_list(nodes)
6512
    if not results:
6513
      raise errors.OpExecError("Can't list volume groups on the nodes")
6514

    
6515
    for node in nodes:
6516
      res = results[node]
6517
      res.Raise("Error checking node %s" % node)
6518
      if vgname not in res.payload:
6519
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
6520
                                 (vgname, node))
6521

    
6522
  def _CheckDisksExistence(self, nodes):
6523
    # Check disk existence
6524
    for idx, dev in enumerate(self.instance.disks):
6525
      if idx not in self.disks:
6526
        continue
6527

    
6528
      for node in nodes:
6529
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
6530
        self.cfg.SetDiskID(dev, node)
6531

    
6532
        result = self.rpc.call_blockdev_find(node, dev)
6533

    
6534
        msg = result.fail_msg
6535
        if msg or not result.payload:
6536
          if not msg:
6537
            msg = "disk not found"
6538
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
6539
                                   (idx, node, msg))
6540

    
6541
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
6542
    for idx, dev in enumerate(self.instance.disks):
6543
      if idx not in self.disks:
6544
        continue
6545

    
6546
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
6547
                      (idx, node_name))
6548

    
6549
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
6550
                                   ldisk=ldisk):
6551
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
6552
                                 " replace disks for instance %s" %
6553
                                 (node_name, self.instance.name))
6554

    
6555
  def _CreateNewStorage(self, node_name):
6556
    vgname = self.cfg.GetVGName()
6557
    iv_names = {}
6558

    
6559
    for idx, dev in enumerate(self.instance.disks):
6560
      if idx not in self.disks:
6561
        continue
6562

    
6563
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
6564

    
6565
      self.cfg.SetDiskID(dev, node_name)
6566

    
6567
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
6568
      names = _GenerateUniqueNames(self.lu, lv_names)
6569

    
6570
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
6571
                             logical_id=(vgname, names[0]))
6572
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6573
                             logical_id=(vgname, names[1]))
6574

    
6575
      new_lvs = [lv_data, lv_meta]
6576
      old_lvs = dev.children
6577
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
6578

    
6579
      # we pass force_create=True to force the LVM creation
6580
      for new_lv in new_lvs:
6581
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
6582
                        _GetInstanceInfoText(self.instance), False)
6583

    
6584
    return iv_names
6585

    
6586
  def _CheckDevices(self, node_name, iv_names):
6587
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
6588
      self.cfg.SetDiskID(dev, node_name)
6589

    
6590
      result = self.rpc.call_blockdev_find(node_name, dev)
6591

    
6592
      msg = result.fail_msg
6593
      if msg or not result.payload:
6594
        if not msg:
6595
          msg = "disk not found"
6596
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
6597
                                 (name, msg))
6598

    
6599
      if result.payload.is_degraded:
6600
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
6601

    
6602
  def _RemoveOldStorage(self, node_name, iv_names):
6603
    for name, (dev, old_lvs, _) in iv_names.iteritems():
6604
      self.lu.LogInfo("Remove logical volumes for %s" % name)
6605

    
6606
      for lv in old_lvs:
6607
        self.cfg.SetDiskID(lv, node_name)
6608

    
6609
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
6610
        if msg:
6611
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
6612
                             hint="remove unused LVs manually")
6613

    
6614
  def _ExecDrbd8DiskOnly(self, feedback_fn):
6615
    """Replace a disk on the primary or secondary for DRBD 8.
6616

6617
    The algorithm for replace is quite complicated:
6618

6619
      1. for each disk to be replaced:
6620

6621
        1. create new LVs on the target node with unique names
6622
        1. detach old LVs from the drbd device
6623
        1. rename old LVs to name_replaced.<time_t>
6624
        1. rename new LVs to old LVs
6625
        1. attach the new LVs (with the old names now) to the drbd device
6626

6627
      1. wait for sync across all devices
6628

6629
      1. for each modified disk:
6630

6631
        1. remove old LVs (which have the name name_replaces.<time_t>)
6632

6633
    Failures are not very well handled.
6634

6635
    """
6636
    steps_total = 6
6637

    
6638
    # Step: check device activation
6639
    self.lu.LogStep(1, steps_total, "Check device existence")
6640
    self._CheckDisksExistence([self.other_node, self.target_node])
6641
    self._CheckVolumeGroup([self.target_node, self.other_node])
6642

    
6643
    # Step: check other node consistency
6644
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6645
    self._CheckDisksConsistency(self.other_node,
6646
                                self.other_node == self.instance.primary_node,
6647
                                False)
6648

    
6649
    # Step: create new storage
6650
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6651
    iv_names = self._CreateNewStorage(self.target_node)
6652

    
6653
    # Step: for each lv, detach+rename*2+attach
6654
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6655
    for dev, old_lvs, new_lvs in iv_names.itervalues():
6656
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
6657

    
6658
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
6659
                                                     old_lvs)
6660
      result.Raise("Can't detach drbd from local storage on node"
6661
                   " %s for device %s" % (self.target_node, dev.iv_name))
6662
      #dev.children = []
6663
      #cfg.Update(instance)
6664

    
6665
      # ok, we created the new LVs, so now we know we have the needed
6666
      # storage; as such, we proceed on the target node to rename
6667
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
6668
      # using the assumption that logical_id == physical_id (which in
6669
      # turn is the unique_id on that node)
6670

    
6671
      # FIXME(iustin): use a better name for the replaced LVs
6672
      temp_suffix = int(time.time())
6673
      ren_fn = lambda d, suff: (d.physical_id[0],
6674
                                d.physical_id[1] + "_replaced-%s" % suff)
6675

    
6676
      # Build the rename list based on what LVs exist on the node
6677
      rename_old_to_new = []
6678
      for to_ren in old_lvs:
6679
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
6680
        if not result.fail_msg and result.payload:
6681
          # device exists
6682
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
6683

    
6684
      self.lu.LogInfo("Renaming the old LVs on the target node")
6685
      result = self.rpc.call_blockdev_rename(self.target_node,
6686
                                             rename_old_to_new)
6687
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
6688

    
6689
      # Now we rename the new LVs to the old LVs
6690
      self.lu.LogInfo("Renaming the new LVs on the target node")
6691
      rename_new_to_old = [(new, old.physical_id)
6692
                           for old, new in zip(old_lvs, new_lvs)]
6693
      result = self.rpc.call_blockdev_rename(self.target_node,
6694
                                             rename_new_to_old)
6695
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
6696

    
6697
      for old, new in zip(old_lvs, new_lvs):
6698
        new.logical_id = old.logical_id
6699
        self.cfg.SetDiskID(new, self.target_node)
6700

    
6701
      for disk in old_lvs:
6702
        disk.logical_id = ren_fn(disk, temp_suffix)
6703
        self.cfg.SetDiskID(disk, self.target_node)
6704

    
6705
      # Now that the new lvs have the old name, we can add them to the device
6706
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
6707
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
6708
                                                  new_lvs)
6709
      msg = result.fail_msg
6710
      if msg:
6711
        for new_lv in new_lvs:
6712
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
6713
                                               new_lv).fail_msg
6714
          if msg2:
6715
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
6716
                               hint=("cleanup manually the unused logical"
6717
                                     "volumes"))
6718
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
6719

    
6720
      dev.children = new_lvs
6721

    
6722
      self.cfg.Update(self.instance, feedback_fn)
6723

    
6724
    # Wait for sync
6725
    # This can fail as the old devices are degraded and _WaitForSync
6726
    # does a combined result over all disks, so we don't check its return value
6727
    self.lu.LogStep(5, steps_total, "Sync devices")
6728
    _WaitForSync(self.lu, self.instance, unlock=True)
6729

    
6730
    # Check all devices manually
6731
    self._CheckDevices(self.instance.primary_node, iv_names)
6732

    
6733
    # Step: remove old storage
6734
    self.lu.LogStep(6, steps_total, "Removing old storage")
6735
    self._RemoveOldStorage(self.target_node, iv_names)
6736

    
6737
  def _ExecDrbd8Secondary(self, feedback_fn):
6738
    """Replace the secondary node for DRBD 8.
6739

6740
    The algorithm for replace is quite complicated:
6741
      - for all disks of the instance:
6742
        - create new LVs on the new node with same names
6743
        - shutdown the drbd device on the old secondary
6744
        - disconnect the drbd network on the primary
6745
        - create the drbd device on the new secondary
6746
        - network attach the drbd on the primary, using an artifice:
6747
          the drbd code for Attach() will connect to the network if it
6748
          finds a device which is connected to the good local disks but
6749
          not network enabled
6750
      - wait for sync across all devices
6751
      - remove all disks from the old secondary
6752

6753
    Failures are not very well handled.
6754

6755
    """
6756
    steps_total = 6
6757

    
6758
    # Step: check device activation
6759
    self.lu.LogStep(1, steps_total, "Check device existence")
6760
    self._CheckDisksExistence([self.instance.primary_node])
6761
    self._CheckVolumeGroup([self.instance.primary_node])
6762

    
6763
    # Step: check other node consistency
6764
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6765
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
6766

    
6767
    # Step: create new storage
6768
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6769
    for idx, dev in enumerate(self.instance.disks):
6770
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
6771
                      (self.new_node, idx))
6772
      # we pass force_create=True to force LVM creation
6773
      for new_lv in dev.children:
6774
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
6775
                        _GetInstanceInfoText(self.instance), False)
6776

    
6777
    # Step 4: dbrd minors and drbd setups changes
6778
    # after this, we must manually remove the drbd minors on both the
6779
    # error and the success paths
6780
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6781
    minors = self.cfg.AllocateDRBDMinor([self.new_node
6782
                                         for dev in self.instance.disks],
6783
                                        self.instance.name)
6784
    logging.debug("Allocated minors %r" % (minors,))
6785

    
6786
    iv_names = {}
6787
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
6788
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
6789
                      (self.new_node, idx))
6790
      # create new devices on new_node; note that we create two IDs:
6791
      # one without port, so the drbd will be activated without
6792
      # networking information on the new node at this stage, and one
6793
      # with network, for the latter activation in step 4
6794
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
6795
      if self.instance.primary_node == o_node1:
6796
        p_minor = o_minor1
6797
      else:
6798
        p_minor = o_minor2
6799

    
6800
      new_alone_id = (self.instance.primary_node, self.new_node, None,
6801
                      p_minor, new_minor, o_secret)
6802
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
6803
                    p_minor, new_minor, o_secret)
6804

    
6805
      iv_names[idx] = (dev, dev.children, new_net_id)
6806
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
6807
                    new_net_id)
6808
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
6809
                              logical_id=new_alone_id,
6810
                              children=dev.children,
6811
                              size=dev.size)
6812
      try:
6813
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
6814
                              _GetInstanceInfoText(self.instance), False)
6815
      except errors.GenericError:
6816
        self.cfg.ReleaseDRBDMinors(self.instance.name)
6817
        raise
6818

    
6819
    # We have new devices, shutdown the drbd on the old secondary
6820
    for idx, dev in enumerate(self.instance.disks):
6821
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
6822
      self.cfg.SetDiskID(dev, self.target_node)
6823
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
6824
      if msg:
6825
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
6826
                           "node: %s" % (idx, msg),
6827
                           hint=("Please cleanup this device manually as"
6828
                                 " soon as possible"))
6829

    
6830
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
6831
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
6832
                                               self.node_secondary_ip,
6833
                                               self.instance.disks)\
6834
                                              [self.instance.primary_node]
6835

    
6836
    msg = result.fail_msg
6837
    if msg:
6838
      # detaches didn't succeed (unlikely)
6839
      self.cfg.ReleaseDRBDMinors(self.instance.name)
6840
      raise errors.OpExecError("Can't detach the disks from the network on"
6841
                               " old node: %s" % (msg,))
6842

    
6843
    # if we managed to detach at least one, we update all the disks of
6844
    # the instance to point to the new secondary
6845
    self.lu.LogInfo("Updating instance configuration")
6846
    for dev, _, new_logical_id in iv_names.itervalues():
6847
      dev.logical_id = new_logical_id
6848
      self.cfg.SetDiskID(dev, self.instance.primary_node)
6849

    
6850
    self.cfg.Update(self.instance, feedback_fn)
6851

    
6852
    # and now perform the drbd attach
6853
    self.lu.LogInfo("Attaching primary drbds to new secondary"
6854
                    " (standalone => connected)")
6855
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
6856
                                            self.new_node],
6857
                                           self.node_secondary_ip,
6858
                                           self.instance.disks,
6859
                                           self.instance.name,
6860
                                           False)
6861
    for to_node, to_result in result.items():
6862
      msg = to_result.fail_msg
6863
      if msg:
6864
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
6865
                           to_node, msg,
6866
                           hint=("please do a gnt-instance info to see the"
6867
                                 " status of disks"))
6868

    
6869
    # Wait for sync
6870
    # This can fail as the old devices are degraded and _WaitForSync
6871
    # does a combined result over all disks, so we don't check its return value
6872
    self.lu.LogStep(5, steps_total, "Sync devices")
6873
    _WaitForSync(self.lu, self.instance, unlock=True)
6874

    
6875
    # Check all devices manually
6876
    self._CheckDevices(self.instance.primary_node, iv_names)
6877

    
6878
    # Step: remove old storage
6879
    self.lu.LogStep(6, steps_total, "Removing old storage")
6880
    self._RemoveOldStorage(self.target_node, iv_names)
6881

    
6882

    
6883
class LURepairNodeStorage(NoHooksLU):
6884
  """Repairs the volume group on a node.
6885

6886
  """
6887
  _OP_REQP = ["node_name"]
6888
  REQ_BGL = False
6889

    
6890
  def CheckArguments(self):
6891
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
6892
    if node_name is None:
6893
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
6894

    
6895
    self.op.node_name = node_name
6896

    
6897
  def ExpandNames(self):
6898
    self.needed_locks = {
6899
      locking.LEVEL_NODE: [self.op.node_name],
6900
      }
6901

    
6902
  def _CheckFaultyDisks(self, instance, node_name):
6903
    if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
6904
                                node_name, True):
6905
      raise errors.OpPrereqError("Instance '%s' has faulty disks on"
6906
                                 " node '%s'" % (instance.name, node_name))
6907

    
6908
  def CheckPrereq(self):
6909
    """Check prerequisites.
6910

6911
    """
6912
    storage_type = self.op.storage_type
6913

    
6914
    if (constants.SO_FIX_CONSISTENCY not in
6915
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
6916
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
6917
                                 " repaired" % storage_type)
6918

    
6919
    # Check whether any instance on this node has faulty disks
6920
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
6921
      check_nodes = set(inst.all_nodes)
6922
      check_nodes.discard(self.op.node_name)
6923
      for inst_node_name in check_nodes:
6924
        self._CheckFaultyDisks(inst, inst_node_name)
6925

    
6926
  def Exec(self, feedback_fn):
6927
    feedback_fn("Repairing storage unit '%s' on %s ..." %
6928
                (self.op.name, self.op.node_name))
6929

    
6930
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
6931
    result = self.rpc.call_storage_execute(self.op.node_name,
6932
                                           self.op.storage_type, st_args,
6933
                                           self.op.name,
6934
                                           constants.SO_FIX_CONSISTENCY)
6935
    result.Raise("Failed to repair storage unit '%s' on %s" %
6936
                 (self.op.name, self.op.node_name))
6937

    
6938

    
6939
class LUGrowDisk(LogicalUnit):
6940
  """Grow a disk of an instance.
6941

6942
  """
6943
  HPATH = "disk-grow"
6944
  HTYPE = constants.HTYPE_INSTANCE
6945
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
6946
  REQ_BGL = False
6947

    
6948
  def ExpandNames(self):
6949
    self._ExpandAndLockInstance()
6950
    self.needed_locks[locking.LEVEL_NODE] = []
6951
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6952

    
6953
  def DeclareLocks(self, level):
6954
    if level == locking.LEVEL_NODE:
6955
      self._LockInstancesNodes()
6956

    
6957
  def BuildHooksEnv(self):
6958
    """Build hooks env.
6959

6960
    This runs on the master, the primary and all the secondaries.
6961

6962
    """
6963
    env = {
6964
      "DISK": self.op.disk,
6965
      "AMOUNT": self.op.amount,
6966
      }
6967
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6968
    nl = [
6969
      self.cfg.GetMasterNode(),
6970
      self.instance.primary_node,
6971
      ]
6972
    return env, nl, nl
6973

    
6974
  def CheckPrereq(self):
6975
    """Check prerequisites.
6976

6977
    This checks that the instance is in the cluster.
6978

6979
    """
6980
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6981
    assert instance is not None, \
6982
      "Cannot retrieve locked instance %s" % self.op.instance_name
6983
    nodenames = list(instance.all_nodes)
6984
    for node in nodenames:
6985
      _CheckNodeOnline(self, node)
6986

    
6987

    
6988
    self.instance = instance
6989

    
6990
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
6991
      raise errors.OpPrereqError("Instance's disk layout does not support"
6992
                                 " growing.")
6993

    
6994
    self.disk = instance.FindDisk(self.op.disk)
6995

    
6996
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
6997
                                       instance.hypervisor)
6998
    for node in nodenames:
6999
      info = nodeinfo[node]
7000
      info.Raise("Cannot get current information from node %s" % node)
7001
      vg_free = info.payload.get('vg_free', None)
7002
      if not isinstance(vg_free, int):
7003
        raise errors.OpPrereqError("Can't compute free disk space on"
7004
                                   " node %s" % node)
7005
      if self.op.amount > vg_free:
7006
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
7007
                                   " %d MiB available, %d MiB required" %
7008
                                   (node, vg_free, self.op.amount))
7009

    
7010
  def Exec(self, feedback_fn):
7011
    """Execute disk grow.
7012

7013
    """
7014
    instance = self.instance
7015
    disk = self.disk
7016
    for node in instance.all_nodes:
7017
      self.cfg.SetDiskID(disk, node)
7018
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7019
      result.Raise("Grow request failed to node %s" % node)
7020
    disk.RecordGrow(self.op.amount)
7021
    self.cfg.Update(instance, feedback_fn)
7022
    if self.op.wait_for_sync:
7023
      disk_abort = not _WaitForSync(self, instance)
7024
      if disk_abort:
7025
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7026
                             " status.\nPlease check the instance.")
7027

    
7028

    
7029
class LUQueryInstanceData(NoHooksLU):
7030
  """Query runtime instance data.
7031

7032
  """
7033
  _OP_REQP = ["instances", "static"]
7034
  REQ_BGL = False
7035

    
7036
  def ExpandNames(self):
7037
    self.needed_locks = {}
7038
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7039

    
7040
    if not isinstance(self.op.instances, list):
7041
      raise errors.OpPrereqError("Invalid argument type 'instances'")
7042

    
7043
    if self.op.instances:
7044
      self.wanted_names = []
7045
      for name in self.op.instances:
7046
        full_name = self.cfg.ExpandInstanceName(name)
7047
        if full_name is None:
7048
          raise errors.OpPrereqError("Instance '%s' not known" % name)
7049
        self.wanted_names.append(full_name)
7050
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7051
    else:
7052
      self.wanted_names = None
7053
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7054

    
7055
    self.needed_locks[locking.LEVEL_NODE] = []
7056
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7057

    
7058
  def DeclareLocks(self, level):
7059
    if level == locking.LEVEL_NODE:
7060
      self._LockInstancesNodes()
7061

    
7062
  def CheckPrereq(self):
7063
    """Check prerequisites.
7064

7065
    This only checks the optional instance list against the existing names.
7066

7067
    """
7068
    if self.wanted_names is None:
7069
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7070

    
7071
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7072
                             in self.wanted_names]
7073
    return
7074

    
7075
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
7076
    """Returns the status of a block device
7077

7078
    """
7079
    if self.op.static or not node:
7080
      return None
7081

    
7082
    self.cfg.SetDiskID(dev, node)
7083

    
7084
    result = self.rpc.call_blockdev_find(node, dev)
7085
    if result.offline:
7086
      return None
7087

    
7088
    result.Raise("Can't compute disk status for %s" % instance_name)
7089

    
7090
    status = result.payload
7091
    if status is None:
7092
      return None
7093

    
7094
    return (status.dev_path, status.major, status.minor,
7095
            status.sync_percent, status.estimated_time,
7096
            status.is_degraded, status.ldisk_status)
7097

    
7098
  def _ComputeDiskStatus(self, instance, snode, dev):
7099
    """Compute block device status.
7100

7101
    """
7102
    if dev.dev_type in constants.LDS_DRBD:
7103
      # we change the snode then (otherwise we use the one passed in)
7104
      if dev.logical_id[0] == instance.primary_node:
7105
        snode = dev.logical_id[1]
7106
      else:
7107
        snode = dev.logical_id[0]
7108

    
7109
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
7110
                                              instance.name, dev)
7111
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
7112

    
7113
    if dev.children:
7114
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
7115
                      for child in dev.children]
7116
    else:
7117
      dev_children = []
7118

    
7119
    data = {
7120
      "iv_name": dev.iv_name,
7121
      "dev_type": dev.dev_type,
7122
      "logical_id": dev.logical_id,
7123
      "physical_id": dev.physical_id,
7124
      "pstatus": dev_pstatus,
7125
      "sstatus": dev_sstatus,
7126
      "children": dev_children,
7127
      "mode": dev.mode,
7128
      "size": dev.size,
7129
      }
7130

    
7131
    return data
7132

    
7133
  def Exec(self, feedback_fn):
7134
    """Gather and return data"""
7135
    result = {}
7136

    
7137
    cluster = self.cfg.GetClusterInfo()
7138

    
7139
    for instance in self.wanted_instances:
7140
      if not self.op.static:
7141
        remote_info = self.rpc.call_instance_info(instance.primary_node,
7142
                                                  instance.name,
7143
                                                  instance.hypervisor)
7144
        remote_info.Raise("Error checking node %s" % instance.primary_node)
7145
        remote_info = remote_info.payload
7146
        if remote_info and "state" in remote_info:
7147
          remote_state = "up"
7148
        else:
7149
          remote_state = "down"
7150
      else:
7151
        remote_state = None
7152
      if instance.admin_up:
7153
        config_state = "up"
7154
      else:
7155
        config_state = "down"
7156

    
7157
      disks = [self._ComputeDiskStatus(instance, None, device)
7158
               for device in instance.disks]
7159

    
7160
      idict = {
7161
        "name": instance.name,
7162
        "config_state": config_state,
7163
        "run_state": remote_state,
7164
        "pnode": instance.primary_node,
7165
        "snodes": instance.secondary_nodes,
7166
        "os": instance.os,
7167
        # this happens to be the same format used for hooks
7168
        "nics": _NICListToTuple(self, instance.nics),
7169
        "disks": disks,
7170
        "hypervisor": instance.hypervisor,
7171
        "network_port": instance.network_port,
7172
        "hv_instance": instance.hvparams,
7173
        "hv_actual": cluster.FillHV(instance),
7174
        "be_instance": instance.beparams,
7175
        "be_actual": cluster.FillBE(instance),
7176
        "serial_no": instance.serial_no,
7177
        "mtime": instance.mtime,
7178
        "ctime": instance.ctime,
7179
        "uuid": instance.uuid,
7180
        }
7181

    
7182
      result[instance.name] = idict
7183

    
7184
    return result
7185

    
7186

    
7187
class LUSetInstanceParams(LogicalUnit):
7188
  """Modifies an instances's parameters.
7189

7190
  """
7191
  HPATH = "instance-modify"
7192
  HTYPE = constants.HTYPE_INSTANCE
7193
  _OP_REQP = ["instance_name"]
7194
  REQ_BGL = False
7195

    
7196
  def CheckArguments(self):
7197
    if not hasattr(self.op, 'nics'):
7198
      self.op.nics = []
7199
    if not hasattr(self.op, 'disks'):
7200
      self.op.disks = []
7201
    if not hasattr(self.op, 'beparams'):
7202
      self.op.beparams = {}
7203
    if not hasattr(self.op, 'hvparams'):
7204
      self.op.hvparams = {}
7205
    self.op.force = getattr(self.op, "force", False)
7206
    if not (self.op.nics or self.op.disks or
7207
            self.op.hvparams or self.op.beparams):
7208
      raise errors.OpPrereqError("No changes submitted")
7209

    
7210
    # Disk validation
7211
    disk_addremove = 0
7212
    for disk_op, disk_dict in self.op.disks:
7213
      if disk_op == constants.DDM_REMOVE:
7214
        disk_addremove += 1
7215
        continue
7216
      elif disk_op == constants.DDM_ADD:
7217
        disk_addremove += 1
7218
      else:
7219
        if not isinstance(disk_op, int):
7220
          raise errors.OpPrereqError("Invalid disk index")
7221
        if not isinstance(disk_dict, dict):
7222
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
7223
          raise errors.OpPrereqError(msg)
7224

    
7225
      if disk_op == constants.DDM_ADD:
7226
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
7227
        if mode not in constants.DISK_ACCESS_SET:
7228
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
7229
        size = disk_dict.get('size', None)
7230
        if size is None:
7231
          raise errors.OpPrereqError("Required disk parameter size missing")
7232
        try:
7233
          size = int(size)
7234
        except ValueError, err:
7235
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
7236
                                     str(err))
7237
        disk_dict['size'] = size
7238
      else:
7239
        # modification of disk
7240
        if 'size' in disk_dict:
7241
          raise errors.OpPrereqError("Disk size change not possible, use"
7242
                                     " grow-disk")
7243

    
7244
    if disk_addremove > 1:
7245
      raise errors.OpPrereqError("Only one disk add or remove operation"
7246
                                 " supported at a time")
7247

    
7248
    # NIC validation
7249
    nic_addremove = 0
7250
    for nic_op, nic_dict in self.op.nics:
7251
      if nic_op == constants.DDM_REMOVE:
7252
        nic_addremove += 1
7253
        continue
7254
      elif nic_op == constants.DDM_ADD:
7255
        nic_addremove += 1
7256
      else:
7257
        if not isinstance(nic_op, int):
7258
          raise errors.OpPrereqError("Invalid nic index")
7259
        if not isinstance(nic_dict, dict):
7260
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
7261
          raise errors.OpPrereqError(msg)
7262

    
7263
      # nic_dict should be a dict
7264
      nic_ip = nic_dict.get('ip', None)
7265
      if nic_ip is not None:
7266
        if nic_ip.lower() == constants.VALUE_NONE:
7267
          nic_dict['ip'] = None
7268
        else:
7269
          if not utils.IsValidIP(nic_ip):
7270
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
7271

    
7272
      nic_bridge = nic_dict.get('bridge', None)
7273
      nic_link = nic_dict.get('link', None)
7274
      if nic_bridge and nic_link:
7275
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7276
                                   " at the same time")
7277
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
7278
        nic_dict['bridge'] = None
7279
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
7280
        nic_dict['link'] = None
7281

    
7282
      if nic_op == constants.DDM_ADD:
7283
        nic_mac = nic_dict.get('mac', None)
7284
        if nic_mac is None:
7285
          nic_dict['mac'] = constants.VALUE_AUTO
7286

    
7287
      if 'mac' in nic_dict:
7288
        nic_mac = nic_dict['mac']
7289
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7290
          if not utils.IsValidMac(nic_mac):
7291
            raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
7292
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
7293
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
7294
                                     " modifying an existing nic")
7295

    
7296
    if nic_addremove > 1:
7297
      raise errors.OpPrereqError("Only one NIC add or remove operation"
7298
                                 " supported at a time")
7299

    
7300
  def ExpandNames(self):
7301
    self._ExpandAndLockInstance()
7302
    self.needed_locks[locking.LEVEL_NODE] = []
7303
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7304

    
7305
  def DeclareLocks(self, level):
7306
    if level == locking.LEVEL_NODE:
7307
      self._LockInstancesNodes()
7308

    
7309
  def BuildHooksEnv(self):
7310
    """Build hooks env.
7311

7312
    This runs on the master, primary and secondaries.
7313

7314
    """
7315
    args = dict()
7316
    if constants.BE_MEMORY in self.be_new:
7317
      args['memory'] = self.be_new[constants.BE_MEMORY]
7318
    if constants.BE_VCPUS in self.be_new:
7319
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
7320
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
7321
    # information at all.
7322
    if self.op.nics:
7323
      args['nics'] = []
7324
      nic_override = dict(self.op.nics)
7325
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
7326
      for idx, nic in enumerate(self.instance.nics):
7327
        if idx in nic_override:
7328
          this_nic_override = nic_override[idx]
7329
        else:
7330
          this_nic_override = {}
7331
        if 'ip' in this_nic_override:
7332
          ip = this_nic_override['ip']
7333
        else:
7334
          ip = nic.ip
7335
        if 'mac' in this_nic_override:
7336
          mac = this_nic_override['mac']
7337
        else:
7338
          mac = nic.mac
7339
        if idx in self.nic_pnew:
7340
          nicparams = self.nic_pnew[idx]
7341
        else:
7342
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
7343
        mode = nicparams[constants.NIC_MODE]
7344
        link = nicparams[constants.NIC_LINK]
7345
        args['nics'].append((ip, mac, mode, link))
7346
      if constants.DDM_ADD in nic_override:
7347
        ip = nic_override[constants.DDM_ADD].get('ip', None)
7348
        mac = nic_override[constants.DDM_ADD]['mac']
7349
        nicparams = self.nic_pnew[constants.DDM_ADD]
7350
        mode = nicparams[constants.NIC_MODE]
7351
        link = nicparams[constants.NIC_LINK]
7352
        args['nics'].append((ip, mac, mode, link))
7353
      elif constants.DDM_REMOVE in nic_override:
7354
        del args['nics'][-1]
7355

    
7356
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
7357
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7358
    return env, nl, nl
7359

    
7360
  def _GetUpdatedParams(self, old_params, update_dict,
7361
                        default_values, parameter_types):
7362
    """Return the new params dict for the given params.
7363

7364
    @type old_params: dict
7365
    @param old_params: old parameters
7366
    @type update_dict: dict
7367
    @param update_dict: dict containing new parameter values,
7368
                        or constants.VALUE_DEFAULT to reset the
7369
                        parameter to its default value
7370
    @type default_values: dict
7371
    @param default_values: default values for the filled parameters
7372
    @type parameter_types: dict
7373
    @param parameter_types: dict mapping target dict keys to types
7374
                            in constants.ENFORCEABLE_TYPES
7375
    @rtype: (dict, dict)
7376
    @return: (new_parameters, filled_parameters)
7377

7378
    """
7379
    params_copy = copy.deepcopy(old_params)
7380
    for key, val in update_dict.iteritems():
7381
      if val == constants.VALUE_DEFAULT:
7382
        try:
7383
          del params_copy[key]
7384
        except KeyError:
7385
          pass
7386
      else:
7387
        params_copy[key] = val
7388
    utils.ForceDictType(params_copy, parameter_types)
7389
    params_filled = objects.FillDict(default_values, params_copy)
7390
    return (params_copy, params_filled)
7391

    
7392
  def CheckPrereq(self):
7393
    """Check prerequisites.
7394

7395
    This only checks the instance list against the existing names.
7396

7397
    """
7398
    self.force = self.op.force
7399

    
7400
    # checking the new params on the primary/secondary nodes
7401

    
7402
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7403
    cluster = self.cluster = self.cfg.GetClusterInfo()
7404
    assert self.instance is not None, \
7405
      "Cannot retrieve locked instance %s" % self.op.instance_name
7406
    pnode = instance.primary_node
7407
    nodelist = list(instance.all_nodes)
7408

    
7409
    # hvparams processing
7410
    if self.op.hvparams:
7411
      i_hvdict, hv_new = self._GetUpdatedParams(
7412
                             instance.hvparams, self.op.hvparams,
7413
                             cluster.hvparams[instance.hypervisor],
7414
                             constants.HVS_PARAMETER_TYPES)
7415
      # local check
7416
      hypervisor.GetHypervisor(
7417
        instance.hypervisor).CheckParameterSyntax(hv_new)
7418
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
7419
      self.hv_new = hv_new # the new actual values
7420
      self.hv_inst = i_hvdict # the new dict (without defaults)
7421
    else:
7422
      self.hv_new = self.hv_inst = {}
7423

    
7424
    # beparams processing
7425
    if self.op.beparams:
7426
      i_bedict, be_new = self._GetUpdatedParams(
7427
                             instance.beparams, self.op.beparams,
7428
                             cluster.beparams[constants.PP_DEFAULT],
7429
                             constants.BES_PARAMETER_TYPES)
7430
      self.be_new = be_new # the new actual values
7431
      self.be_inst = i_bedict # the new dict (without defaults)
7432
    else:
7433
      self.be_new = self.be_inst = {}
7434

    
7435
    self.warn = []
7436

    
7437
    if constants.BE_MEMORY in self.op.beparams and not self.force:
7438
      mem_check_list = [pnode]
7439
      if be_new[constants.BE_AUTO_BALANCE]:
7440
        # either we changed auto_balance to yes or it was from before
7441
        mem_check_list.extend(instance.secondary_nodes)
7442
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
7443
                                                  instance.hypervisor)
7444
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
7445
                                         instance.hypervisor)
7446
      pninfo = nodeinfo[pnode]
7447
      msg = pninfo.fail_msg
7448
      if msg:
7449
        # Assume the primary node is unreachable and go ahead
7450
        self.warn.append("Can't get info from primary node %s: %s" %
7451
                         (pnode,  msg))
7452
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
7453
        self.warn.append("Node data from primary node %s doesn't contain"
7454
                         " free memory information" % pnode)
7455
      elif instance_info.fail_msg:
7456
        self.warn.append("Can't get instance runtime information: %s" %
7457
                        instance_info.fail_msg)
7458
      else:
7459
        if instance_info.payload:
7460
          current_mem = int(instance_info.payload['memory'])
7461
        else:
7462
          # Assume instance not running
7463
          # (there is a slight race condition here, but it's not very probable,
7464
          # and we have no other way to check)
7465
          current_mem = 0
7466
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
7467
                    pninfo.payload['memory_free'])
7468
        if miss_mem > 0:
7469
          raise errors.OpPrereqError("This change will prevent the instance"
7470
                                     " from starting, due to %d MB of memory"
7471
                                     " missing on its primary node" % miss_mem)
7472

    
7473
      if be_new[constants.BE_AUTO_BALANCE]:
7474
        for node, nres in nodeinfo.items():
7475
          if node not in instance.secondary_nodes:
7476
            continue
7477
          msg = nres.fail_msg
7478
          if msg:
7479
            self.warn.append("Can't get info from secondary node %s: %s" %
7480
                             (node, msg))
7481
          elif not isinstance(nres.payload.get('memory_free', None), int):
7482
            self.warn.append("Secondary node %s didn't return free"
7483
                             " memory information" % node)
7484
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
7485
            self.warn.append("Not enough memory to failover instance to"
7486
                             " secondary node %s" % node)
7487

    
7488
    # NIC processing
7489
    self.nic_pnew = {}
7490
    self.nic_pinst = {}
7491
    for nic_op, nic_dict in self.op.nics:
7492
      if nic_op == constants.DDM_REMOVE:
7493
        if not instance.nics:
7494
          raise errors.OpPrereqError("Instance has no NICs, cannot remove")
7495
        continue
7496
      if nic_op != constants.DDM_ADD:
7497
        # an existing nic
7498
        if nic_op < 0 or nic_op >= len(instance.nics):
7499
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
7500
                                     " are 0 to %d" %
7501
                                     (nic_op, len(instance.nics)))
7502
        old_nic_params = instance.nics[nic_op].nicparams
7503
        old_nic_ip = instance.nics[nic_op].ip
7504
      else:
7505
        old_nic_params = {}
7506
        old_nic_ip = None
7507

    
7508
      update_params_dict = dict([(key, nic_dict[key])
7509
                                 for key in constants.NICS_PARAMETERS
7510
                                 if key in nic_dict])
7511

    
7512
      if 'bridge' in nic_dict:
7513
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
7514

    
7515
      new_nic_params, new_filled_nic_params = \
7516
          self._GetUpdatedParams(old_nic_params, update_params_dict,
7517
                                 cluster.nicparams[constants.PP_DEFAULT],
7518
                                 constants.NICS_PARAMETER_TYPES)
7519
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
7520
      self.nic_pinst[nic_op] = new_nic_params
7521
      self.nic_pnew[nic_op] = new_filled_nic_params
7522
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
7523

    
7524
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
7525
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
7526
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
7527
        if msg:
7528
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
7529
          if self.force:
7530
            self.warn.append(msg)
7531
          else:
7532
            raise errors.OpPrereqError(msg)
7533
      if new_nic_mode == constants.NIC_MODE_ROUTED:
7534
        if 'ip' in nic_dict:
7535
          nic_ip = nic_dict['ip']
7536
        else:
7537
          nic_ip = old_nic_ip
7538
        if nic_ip is None:
7539
          raise errors.OpPrereqError('Cannot set the nic ip to None'
7540
                                     ' on a routed nic')
7541
      if 'mac' in nic_dict:
7542
        nic_mac = nic_dict['mac']
7543
        if nic_mac is None:
7544
          raise errors.OpPrereqError('Cannot set the nic mac to None')
7545
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7546
          # otherwise generate the mac
7547
          nic_dict['mac'] = self.cfg.GenerateMAC()
7548
        else:
7549
          # or validate/reserve the current one
7550
          if self.cfg.IsMacInUse(nic_mac):
7551
            raise errors.OpPrereqError("MAC address %s already in use"
7552
                                       " in cluster" % nic_mac)
7553

    
7554
    # DISK processing
7555
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
7556
      raise errors.OpPrereqError("Disk operations not supported for"
7557
                                 " diskless instances")
7558
    for disk_op, disk_dict in self.op.disks:
7559
      if disk_op == constants.DDM_REMOVE:
7560
        if len(instance.disks) == 1:
7561
          raise errors.OpPrereqError("Cannot remove the last disk of"
7562
                                     " an instance")
7563
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
7564
        ins_l = ins_l[pnode]
7565
        msg = ins_l.fail_msg
7566
        if msg:
7567
          raise errors.OpPrereqError("Can't contact node %s: %s" %
7568
                                     (pnode, msg))
7569
        if instance.name in ins_l.payload:
7570
          raise errors.OpPrereqError("Instance is running, can't remove"
7571
                                     " disks.")
7572

    
7573
      if (disk_op == constants.DDM_ADD and
7574
          len(instance.nics) >= constants.MAX_DISKS):
7575
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
7576
                                   " add more" % constants.MAX_DISKS)
7577
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
7578
        # an existing disk
7579
        if disk_op < 0 or disk_op >= len(instance.disks):
7580
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
7581
                                     " are 0 to %d" %
7582
                                     (disk_op, len(instance.disks)))
7583

    
7584
    return
7585

    
7586
  def Exec(self, feedback_fn):
7587
    """Modifies an instance.
7588

7589
    All parameters take effect only at the next restart of the instance.
7590

7591
    """
7592
    # Process here the warnings from CheckPrereq, as we don't have a
7593
    # feedback_fn there.
7594
    for warn in self.warn:
7595
      feedback_fn("WARNING: %s" % warn)
7596

    
7597
    result = []
7598
    instance = self.instance
7599
    cluster = self.cluster
7600
    # disk changes
7601
    for disk_op, disk_dict in self.op.disks:
7602
      if disk_op == constants.DDM_REMOVE:
7603
        # remove the last disk
7604
        device = instance.disks.pop()
7605
        device_idx = len(instance.disks)
7606
        for node, disk in device.ComputeNodeTree(instance.primary_node):
7607
          self.cfg.SetDiskID(disk, node)
7608
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
7609
          if msg:
7610
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
7611
                            " continuing anyway", device_idx, node, msg)
7612
        result.append(("disk/%d" % device_idx, "remove"))
7613
      elif disk_op == constants.DDM_ADD:
7614
        # add a new disk
7615
        if instance.disk_template == constants.DT_FILE:
7616
          file_driver, file_path = instance.disks[0].logical_id
7617
          file_path = os.path.dirname(file_path)
7618
        else:
7619
          file_driver = file_path = None
7620
        disk_idx_base = len(instance.disks)
7621
        new_disk = _GenerateDiskTemplate(self,
7622
                                         instance.disk_template,
7623
                                         instance.name, instance.primary_node,
7624
                                         instance.secondary_nodes,
7625
                                         [disk_dict],
7626
                                         file_path,
7627
                                         file_driver,
7628
                                         disk_idx_base)[0]
7629
        instance.disks.append(new_disk)
7630
        info = _GetInstanceInfoText(instance)
7631

    
7632
        logging.info("Creating volume %s for instance %s",
7633
                     new_disk.iv_name, instance.name)
7634
        # Note: this needs to be kept in sync with _CreateDisks
7635
        #HARDCODE
7636
        for node in instance.all_nodes:
7637
          f_create = node == instance.primary_node
7638
          try:
7639
            _CreateBlockDev(self, node, instance, new_disk,
7640
                            f_create, info, f_create)
7641
          except errors.OpExecError, err:
7642
            self.LogWarning("Failed to create volume %s (%s) on"
7643
                            " node %s: %s",
7644
                            new_disk.iv_name, new_disk, node, err)
7645
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
7646
                       (new_disk.size, new_disk.mode)))
7647
      else:
7648
        # change a given disk
7649
        instance.disks[disk_op].mode = disk_dict['mode']
7650
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
7651
    # NIC changes
7652
    for nic_op, nic_dict in self.op.nics:
7653
      if nic_op == constants.DDM_REMOVE:
7654
        # remove the last nic
7655
        del instance.nics[-1]
7656
        result.append(("nic.%d" % len(instance.nics), "remove"))
7657
      elif nic_op == constants.DDM_ADD:
7658
        # mac and bridge should be set, by now
7659
        mac = nic_dict['mac']
7660
        ip = nic_dict.get('ip', None)
7661
        nicparams = self.nic_pinst[constants.DDM_ADD]
7662
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
7663
        instance.nics.append(new_nic)
7664
        result.append(("nic.%d" % (len(instance.nics) - 1),
7665
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
7666
                       (new_nic.mac, new_nic.ip,
7667
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
7668
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
7669
                       )))
7670
      else:
7671
        for key in 'mac', 'ip':
7672
          if key in nic_dict:
7673
            setattr(instance.nics[nic_op], key, nic_dict[key])
7674
        if nic_op in self.nic_pnew:
7675
          instance.nics[nic_op].nicparams = self.nic_pnew[nic_op]
7676
        for key, val in nic_dict.iteritems():
7677
          result.append(("nic.%s/%d" % (key, nic_op), val))
7678

    
7679
    # hvparams changes
7680
    if self.op.hvparams:
7681
      instance.hvparams = self.hv_inst
7682
      for key, val in self.op.hvparams.iteritems():
7683
        result.append(("hv/%s" % key, val))
7684

    
7685
    # beparams changes
7686
    if self.op.beparams:
7687
      instance.beparams = self.be_inst
7688
      for key, val in self.op.beparams.iteritems():
7689
        result.append(("be/%s" % key, val))
7690

    
7691
    self.cfg.Update(instance, feedback_fn)
7692

    
7693
    return result
7694

    
7695

    
7696
class LUQueryExports(NoHooksLU):
7697
  """Query the exports list
7698

7699
  """
7700
  _OP_REQP = ['nodes']
7701
  REQ_BGL = False
7702

    
7703
  def ExpandNames(self):
7704
    self.needed_locks = {}
7705
    self.share_locks[locking.LEVEL_NODE] = 1
7706
    if not self.op.nodes:
7707
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7708
    else:
7709
      self.needed_locks[locking.LEVEL_NODE] = \
7710
        _GetWantedNodes(self, self.op.nodes)
7711

    
7712
  def CheckPrereq(self):
7713
    """Check prerequisites.
7714

7715
    """
7716
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
7717

    
7718
  def Exec(self, feedback_fn):
7719
    """Compute the list of all the exported system images.
7720

7721
    @rtype: dict
7722
    @return: a dictionary with the structure node->(export-list)
7723
        where export-list is a list of the instances exported on
7724
        that node.
7725

7726
    """
7727
    rpcresult = self.rpc.call_export_list(self.nodes)
7728
    result = {}
7729
    for node in rpcresult:
7730
      if rpcresult[node].fail_msg:
7731
        result[node] = False
7732
      else:
7733
        result[node] = rpcresult[node].payload
7734

    
7735
    return result
7736

    
7737

    
7738
class LUExportInstance(LogicalUnit):
7739
  """Export an instance to an image in the cluster.
7740

7741
  """
7742
  HPATH = "instance-export"
7743
  HTYPE = constants.HTYPE_INSTANCE
7744
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
7745
  REQ_BGL = False
7746

    
7747
  def CheckArguments(self):
7748
    """Check the arguments.
7749

7750
    """
7751
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
7752
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
7753

    
7754
  def ExpandNames(self):
7755
    self._ExpandAndLockInstance()
7756
    # FIXME: lock only instance primary and destination node
7757
    #
7758
    # Sad but true, for now we have do lock all nodes, as we don't know where
7759
    # the previous export might be, and and in this LU we search for it and
7760
    # remove it from its current node. In the future we could fix this by:
7761
    #  - making a tasklet to search (share-lock all), then create the new one,
7762
    #    then one to remove, after
7763
    #  - removing the removal operation altogether
7764
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7765

    
7766
  def DeclareLocks(self, level):
7767
    """Last minute lock declaration."""
7768
    # All nodes are locked anyway, so nothing to do here.
7769

    
7770
  def BuildHooksEnv(self):
7771
    """Build hooks env.
7772

7773
    This will run on the master, primary node and target node.
7774

7775
    """
7776
    env = {
7777
      "EXPORT_NODE": self.op.target_node,
7778
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
7779
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
7780
      }
7781
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7782
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
7783
          self.op.target_node]
7784
    return env, nl, nl
7785

    
7786
  def CheckPrereq(self):
7787
    """Check prerequisites.
7788

7789
    This checks that the instance and node names are valid.
7790

7791
    """
7792
    instance_name = self.op.instance_name
7793
    self.instance = self.cfg.GetInstanceInfo(instance_name)
7794
    assert self.instance is not None, \
7795
          "Cannot retrieve locked instance %s" % self.op.instance_name
7796
    _CheckNodeOnline(self, self.instance.primary_node)
7797

    
7798
    self.dst_node = self.cfg.GetNodeInfo(
7799
      self.cfg.ExpandNodeName(self.op.target_node))
7800

    
7801
    if self.dst_node is None:
7802
      # This is wrong node name, not a non-locked node
7803
      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
7804
    _CheckNodeOnline(self, self.dst_node.name)
7805
    _CheckNodeNotDrained(self, self.dst_node.name)
7806

    
7807
    # instance disk type verification
7808
    for disk in self.instance.disks:
7809
      if disk.dev_type == constants.LD_FILE:
7810
        raise errors.OpPrereqError("Export not supported for instances with"
7811
                                   " file-based disks")
7812

    
7813
  def Exec(self, feedback_fn):
7814
    """Export an instance to an image in the cluster.
7815

7816
    """
7817
    instance = self.instance
7818
    dst_node = self.dst_node
7819
    src_node = instance.primary_node
7820

    
7821
    if self.op.shutdown:
7822
      # shutdown the instance, but not the disks
7823
      feedback_fn("Shutting down instance %s" % instance.name)
7824
      result = self.rpc.call_instance_shutdown(src_node, instance,
7825
                                               self.shutdown_timeout)
7826
      result.Raise("Could not shutdown instance %s on"
7827
                   " node %s" % (instance.name, src_node))
7828

    
7829
    vgname = self.cfg.GetVGName()
7830

    
7831
    snap_disks = []
7832

    
7833
    # set the disks ID correctly since call_instance_start needs the
7834
    # correct drbd minor to create the symlinks
7835
    for disk in instance.disks:
7836
      self.cfg.SetDiskID(disk, src_node)
7837

    
7838
    # per-disk results
7839
    dresults = []
7840
    try:
7841
      for idx, disk in enumerate(instance.disks):
7842
        feedback_fn("Creating a snapshot of disk/%s on node %s" %
7843
                    (idx, src_node))
7844

    
7845
        # result.payload will be a snapshot of an lvm leaf of the one we passed
7846
        result = self.rpc.call_blockdev_snapshot(src_node, disk)
7847
        msg = result.fail_msg
7848
        if msg:
7849
          self.LogWarning("Could not snapshot disk/%s on node %s: %s",
7850
                          idx, src_node, msg)
7851
          snap_disks.append(False)
7852
        else:
7853
          disk_id = (vgname, result.payload)
7854
          new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
7855
                                 logical_id=disk_id, physical_id=disk_id,
7856
                                 iv_name=disk.iv_name)
7857
          snap_disks.append(new_dev)
7858

    
7859
    finally:
7860
      if self.op.shutdown and instance.admin_up:
7861
        feedback_fn("Starting instance %s" % instance.name)
7862
        result = self.rpc.call_instance_start(src_node, instance, None, None)
7863
        msg = result.fail_msg
7864
        if msg:
7865
          _ShutdownInstanceDisks(self, instance)
7866
          raise errors.OpExecError("Could not start instance: %s" % msg)
7867

    
7868
    # TODO: check for size
7869

    
7870
    cluster_name = self.cfg.GetClusterName()
7871
    for idx, dev in enumerate(snap_disks):
7872
      feedback_fn("Exporting snapshot %s from %s to %s" %
7873
                  (idx, src_node, dst_node.name))
7874
      if dev:
7875
        result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
7876
                                               instance, cluster_name, idx)
7877
        msg = result.fail_msg
7878
        if msg:
7879
          self.LogWarning("Could not export disk/%s from node %s to"
7880
                          " node %s: %s", idx, src_node, dst_node.name, msg)
7881
          dresults.append(False)
7882
        else:
7883
          dresults.append(True)
7884
        msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
7885
        if msg:
7886
          self.LogWarning("Could not remove snapshot for disk/%d from node"
7887
                          " %s: %s", idx, src_node, msg)
7888
      else:
7889
        dresults.append(False)
7890

    
7891
    feedback_fn("Finalizing export on %s" % dst_node.name)
7892
    result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
7893
    fin_resu = True
7894
    msg = result.fail_msg
7895
    if msg:
7896
      self.LogWarning("Could not finalize export for instance %s"
7897
                      " on node %s: %s", instance.name, dst_node.name, msg)
7898
      fin_resu = False
7899

    
7900
    nodelist = self.cfg.GetNodeList()
7901
    nodelist.remove(dst_node.name)
7902

    
7903
    # on one-node clusters nodelist will be empty after the removal
7904
    # if we proceed the backup would be removed because OpQueryExports
7905
    # substitutes an empty list with the full cluster node list.
7906
    iname = instance.name
7907
    if nodelist:
7908
      feedback_fn("Removing old exports for instance %s" % iname)
7909
      exportlist = self.rpc.call_export_list(nodelist)
7910
      for node in exportlist:
7911
        if exportlist[node].fail_msg:
7912
          continue
7913
        if iname in exportlist[node].payload:
7914
          msg = self.rpc.call_export_remove(node, iname).fail_msg
7915
          if msg:
7916
            self.LogWarning("Could not remove older export for instance %s"
7917
                            " on node %s: %s", iname, node, msg)
7918
    return fin_resu, dresults
7919

    
7920

    
7921
class LURemoveExport(NoHooksLU):
7922
  """Remove exports related to the named instance.
7923

7924
  """
7925
  _OP_REQP = ["instance_name"]
7926
  REQ_BGL = False
7927

    
7928
  def ExpandNames(self):
7929
    self.needed_locks = {}
7930
    # We need all nodes to be locked in order for RemoveExport to work, but we
7931
    # don't need to lock the instance itself, as nothing will happen to it (and
7932
    # we can remove exports also for a removed instance)
7933
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7934

    
7935
  def CheckPrereq(self):
7936
    """Check prerequisites.
7937
    """
7938
    pass
7939

    
7940
  def Exec(self, feedback_fn):
7941
    """Remove any export.
7942

7943
    """
7944
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
7945
    # If the instance was not found we'll try with the name that was passed in.
7946
    # This will only work if it was an FQDN, though.
7947
    fqdn_warn = False
7948
    if not instance_name:
7949
      fqdn_warn = True
7950
      instance_name = self.op.instance_name
7951

    
7952
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7953
    exportlist = self.rpc.call_export_list(locked_nodes)
7954
    found = False
7955
    for node in exportlist:
7956
      msg = exportlist[node].fail_msg
7957
      if msg:
7958
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
7959
        continue
7960
      if instance_name in exportlist[node].payload:
7961
        found = True
7962
        result = self.rpc.call_export_remove(node, instance_name)
7963
        msg = result.fail_msg
7964
        if msg:
7965
          logging.error("Could not remove export for instance %s"
7966
                        " on node %s: %s", instance_name, node, msg)
7967

    
7968
    if fqdn_warn and not found:
7969
      feedback_fn("Export not found. If trying to remove an export belonging"
7970
                  " to a deleted instance please use its Fully Qualified"
7971
                  " Domain Name.")
7972

    
7973

    
7974
class TagsLU(NoHooksLU):
7975
  """Generic tags LU.
7976

7977
  This is an abstract class which is the parent of all the other tags LUs.
7978

7979
  """
7980

    
7981
  def ExpandNames(self):
7982
    self.needed_locks = {}
7983
    if self.op.kind == constants.TAG_NODE:
7984
      name = self.cfg.ExpandNodeName(self.op.name)
7985
      if name is None:
7986
        raise errors.OpPrereqError("Invalid node name (%s)" %
7987
                                   (self.op.name,))
7988
      self.op.name = name
7989
      self.needed_locks[locking.LEVEL_NODE] = name
7990
    elif self.op.kind == constants.TAG_INSTANCE:
7991
      name = self.cfg.ExpandInstanceName(self.op.name)
7992
      if name is None:
7993
        raise errors.OpPrereqError("Invalid instance name (%s)" %
7994
                                   (self.op.name,))
7995
      self.op.name = name
7996
      self.needed_locks[locking.LEVEL_INSTANCE] = name
7997

    
7998
  def CheckPrereq(self):
7999
    """Check prerequisites.
8000

8001
    """
8002
    if self.op.kind == constants.TAG_CLUSTER:
8003
      self.target = self.cfg.GetClusterInfo()
8004
    elif self.op.kind == constants.TAG_NODE:
8005
      self.target = self.cfg.GetNodeInfo(self.op.name)
8006
    elif self.op.kind == constants.TAG_INSTANCE:
8007
      self.target = self.cfg.GetInstanceInfo(self.op.name)
8008
    else:
8009
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
8010
                                 str(self.op.kind))
8011

    
8012

    
8013
class LUGetTags(TagsLU):
8014
  """Returns the tags of a given object.
8015

8016
  """
8017
  _OP_REQP = ["kind", "name"]
8018
  REQ_BGL = False
8019

    
8020
  def Exec(self, feedback_fn):
8021
    """Returns the tag list.
8022

8023
    """
8024
    return list(self.target.GetTags())
8025

    
8026

    
8027
class LUSearchTags(NoHooksLU):
8028
  """Searches the tags for a given pattern.
8029

8030
  """
8031
  _OP_REQP = ["pattern"]
8032
  REQ_BGL = False
8033

    
8034
  def ExpandNames(self):
8035
    self.needed_locks = {}
8036

    
8037
  def CheckPrereq(self):
8038
    """Check prerequisites.
8039

8040
    This checks the pattern passed for validity by compiling it.
8041

8042
    """
8043
    try:
8044
      self.re = re.compile(self.op.pattern)
8045
    except re.error, err:
8046
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
8047
                                 (self.op.pattern, err))
8048

    
8049
  def Exec(self, feedback_fn):
8050
    """Returns the tag list.
8051

8052
    """
8053
    cfg = self.cfg
8054
    tgts = [("/cluster", cfg.GetClusterInfo())]
8055
    ilist = cfg.GetAllInstancesInfo().values()
8056
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
8057
    nlist = cfg.GetAllNodesInfo().values()
8058
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
8059
    results = []
8060
    for path, target in tgts:
8061
      for tag in target.GetTags():
8062
        if self.re.search(tag):
8063
          results.append((path, tag))
8064
    return results
8065

    
8066

    
8067
class LUAddTags(TagsLU):
8068
  """Sets a tag on a given object.
8069

8070
  """
8071
  _OP_REQP = ["kind", "name", "tags"]
8072
  REQ_BGL = False
8073

    
8074
  def CheckPrereq(self):
8075
    """Check prerequisites.
8076

8077
    This checks the type and length of the tag name and value.
8078

8079
    """
8080
    TagsLU.CheckPrereq(self)
8081
    for tag in self.op.tags:
8082
      objects.TaggableObject.ValidateTag(tag)
8083

    
8084
  def Exec(self, feedback_fn):
8085
    """Sets the tag.
8086

8087
    """
8088
    try:
8089
      for tag in self.op.tags:
8090
        self.target.AddTag(tag)
8091
    except errors.TagError, err:
8092
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
8093
    try:
8094
      self.cfg.Update(self.target, feedback_fn)
8095
    except errors.ConfigurationError:
8096
      raise errors.OpRetryError("There has been a modification to the"
8097
                                " config file and the operation has been"
8098
                                " aborted. Please retry.")
8099

    
8100

    
8101
class LUDelTags(TagsLU):
8102
  """Delete a list of tags from a given object.
8103

8104
  """
8105
  _OP_REQP = ["kind", "name", "tags"]
8106
  REQ_BGL = False
8107

    
8108
  def CheckPrereq(self):
8109
    """Check prerequisites.
8110

8111
    This checks that we have the given tag.
8112

8113
    """
8114
    TagsLU.CheckPrereq(self)
8115
    for tag in self.op.tags:
8116
      objects.TaggableObject.ValidateTag(tag)
8117
    del_tags = frozenset(self.op.tags)
8118
    cur_tags = self.target.GetTags()
8119
    if not del_tags <= cur_tags:
8120
      diff_tags = del_tags - cur_tags
8121
      diff_names = ["'%s'" % tag for tag in diff_tags]
8122
      diff_names.sort()
8123
      raise errors.OpPrereqError("Tag(s) %s not found" %
8124
                                 (",".join(diff_names)))
8125

    
8126
  def Exec(self, feedback_fn):
8127
    """Remove the tag from the object.
8128

8129
    """
8130
    for tag in self.op.tags:
8131
      self.target.RemoveTag(tag)
8132
    try:
8133
      self.cfg.Update(self.target, feedback_fn)
8134
    except errors.ConfigurationError:
8135
      raise errors.OpRetryError("There has been a modification to the"
8136
                                " config file and the operation has been"
8137
                                " aborted. Please retry.")
8138

    
8139

    
8140
class LUTestDelay(NoHooksLU):
8141
  """Sleep for a specified amount of time.
8142

8143
  This LU sleeps on the master and/or nodes for a specified amount of
8144
  time.
8145

8146
  """
8147
  _OP_REQP = ["duration", "on_master", "on_nodes"]
8148
  REQ_BGL = False
8149

    
8150
  def ExpandNames(self):
8151
    """Expand names and set required locks.
8152

8153
    This expands the node list, if any.
8154

8155
    """
8156
    self.needed_locks = {}
8157
    if self.op.on_nodes:
8158
      # _GetWantedNodes can be used here, but is not always appropriate to use
8159
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
8160
      # more information.
8161
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
8162
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
8163

    
8164
  def CheckPrereq(self):
8165
    """Check prerequisites.
8166

8167
    """
8168

    
8169
  def Exec(self, feedback_fn):
8170
    """Do the actual sleep.
8171

8172
    """
8173
    if self.op.on_master:
8174
      if not utils.TestDelay(self.op.duration):
8175
        raise errors.OpExecError("Error during master delay test")
8176
    if self.op.on_nodes:
8177
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
8178
      for node, node_result in result.items():
8179
        node_result.Raise("Failure during rpc call to node %s" % node)
8180

    
8181

    
8182
class IAllocator(object):
8183
  """IAllocator framework.
8184

8185
  An IAllocator instance has three sets of attributes:
8186
    - cfg that is needed to query the cluster
8187
    - input data (all members of the _KEYS class attribute are required)
8188
    - four buffer attributes (in|out_data|text), that represent the
8189
      input (to the external script) in text and data structure format,
8190
      and the output from it, again in two formats
8191
    - the result variables from the script (success, info, nodes) for
8192
      easy usage
8193

8194
  """
8195
  _ALLO_KEYS = [
8196
    "mem_size", "disks", "disk_template",
8197
    "os", "tags", "nics", "vcpus", "hypervisor",
8198
    ]
8199
  _RELO_KEYS = [
8200
    "relocate_from",
8201
    ]
8202

    
8203
  def __init__(self, cfg, rpc, mode, name, **kwargs):
8204
    self.cfg = cfg
8205
    self.rpc = rpc
8206
    # init buffer variables
8207
    self.in_text = self.out_text = self.in_data = self.out_data = None
8208
    # init all input fields so that pylint is happy
8209
    self.mode = mode
8210
    self.name = name
8211
    self.mem_size = self.disks = self.disk_template = None
8212
    self.os = self.tags = self.nics = self.vcpus = None
8213
    self.hypervisor = None
8214
    self.relocate_from = None
8215
    # computed fields
8216
    self.required_nodes = None
8217
    # init result fields
8218
    self.success = self.info = self.nodes = None
8219
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8220
      keyset = self._ALLO_KEYS
8221
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8222
      keyset = self._RELO_KEYS
8223
    else:
8224
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
8225
                                   " IAllocator" % self.mode)
8226
    for key in kwargs:
8227
      if key not in keyset:
8228
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
8229
                                     " IAllocator" % key)
8230
      setattr(self, key, kwargs[key])
8231
    for key in keyset:
8232
      if key not in kwargs:
8233
        raise errors.ProgrammerError("Missing input parameter '%s' to"
8234
                                     " IAllocator" % key)
8235
    self._BuildInputData()
8236

    
8237
  def _ComputeClusterData(self):
8238
    """Compute the generic allocator input data.
8239

8240
    This is the data that is independent of the actual operation.
8241

8242
    """
8243
    cfg = self.cfg
8244
    cluster_info = cfg.GetClusterInfo()
8245
    # cluster data
8246
    data = {
8247
      "version": constants.IALLOCATOR_VERSION,
8248
      "cluster_name": cfg.GetClusterName(),
8249
      "cluster_tags": list(cluster_info.GetTags()),
8250
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
8251
      # we don't have job IDs
8252
      }
8253
    iinfo = cfg.GetAllInstancesInfo().values()
8254
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
8255

    
8256
    # node data
8257
    node_results = {}
8258
    node_list = cfg.GetNodeList()
8259

    
8260
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8261
      hypervisor_name = self.hypervisor
8262
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8263
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
8264

    
8265
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
8266
                                        hypervisor_name)
8267
    node_iinfo = \
8268
      self.rpc.call_all_instances_info(node_list,
8269
                                       cluster_info.enabled_hypervisors)
8270
    for nname, nresult in node_data.items():
8271
      # first fill in static (config-based) values
8272
      ninfo = cfg.GetNodeInfo(nname)
8273
      pnr = {
8274
        "tags": list(ninfo.GetTags()),
8275
        "primary_ip": ninfo.primary_ip,
8276
        "secondary_ip": ninfo.secondary_ip,
8277
        "offline": ninfo.offline,
8278
        "drained": ninfo.drained,
8279
        "master_candidate": ninfo.master_candidate,
8280
        }
8281

    
8282
      if not (ninfo.offline or ninfo.drained):
8283
        nresult.Raise("Can't get data for node %s" % nname)
8284
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
8285
                                nname)
8286
        remote_info = nresult.payload
8287

    
8288
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
8289
                     'vg_size', 'vg_free', 'cpu_total']:
8290
          if attr not in remote_info:
8291
            raise errors.OpExecError("Node '%s' didn't return attribute"
8292
                                     " '%s'" % (nname, attr))
8293
          if not isinstance(remote_info[attr], int):
8294
            raise errors.OpExecError("Node '%s' returned invalid value"
8295
                                     " for '%s': %s" %
8296
                                     (nname, attr, remote_info[attr]))
8297
        # compute memory used by primary instances
8298
        i_p_mem = i_p_up_mem = 0
8299
        for iinfo, beinfo in i_list:
8300
          if iinfo.primary_node == nname:
8301
            i_p_mem += beinfo[constants.BE_MEMORY]
8302
            if iinfo.name not in node_iinfo[nname].payload:
8303
              i_used_mem = 0
8304
            else:
8305
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
8306
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
8307
            remote_info['memory_free'] -= max(0, i_mem_diff)
8308

    
8309
            if iinfo.admin_up:
8310
              i_p_up_mem += beinfo[constants.BE_MEMORY]
8311

    
8312
        # compute memory used by instances
8313
        pnr_dyn = {
8314
          "total_memory": remote_info['memory_total'],
8315
          "reserved_memory": remote_info['memory_dom0'],
8316
          "free_memory": remote_info['memory_free'],
8317
          "total_disk": remote_info['vg_size'],
8318
          "free_disk": remote_info['vg_free'],
8319
          "total_cpus": remote_info['cpu_total'],
8320
          "i_pri_memory": i_p_mem,
8321
          "i_pri_up_memory": i_p_up_mem,
8322
          }
8323
        pnr.update(pnr_dyn)
8324

    
8325
      node_results[nname] = pnr
8326
    data["nodes"] = node_results
8327

    
8328
    # instance data
8329
    instance_data = {}
8330
    for iinfo, beinfo in i_list:
8331
      nic_data = []
8332
      for nic in iinfo.nics:
8333
        filled_params = objects.FillDict(
8334
            cluster_info.nicparams[constants.PP_DEFAULT],
8335
            nic.nicparams)
8336
        nic_dict = {"mac": nic.mac,
8337
                    "ip": nic.ip,
8338
                    "mode": filled_params[constants.NIC_MODE],
8339
                    "link": filled_params[constants.NIC_LINK],
8340
                   }
8341
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
8342
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
8343
        nic_data.append(nic_dict)
8344
      pir = {
8345
        "tags": list(iinfo.GetTags()),
8346
        "admin_up": iinfo.admin_up,
8347
        "vcpus": beinfo[constants.BE_VCPUS],
8348
        "memory": beinfo[constants.BE_MEMORY],
8349
        "os": iinfo.os,
8350
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
8351
        "nics": nic_data,
8352
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
8353
        "disk_template": iinfo.disk_template,
8354
        "hypervisor": iinfo.hypervisor,
8355
        }
8356
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
8357
                                                 pir["disks"])
8358
      instance_data[iinfo.name] = pir
8359

    
8360
    data["instances"] = instance_data
8361

    
8362
    self.in_data = data
8363

    
8364
  def _AddNewInstance(self):
8365
    """Add new instance data to allocator structure.
8366

8367
    This in combination with _AllocatorGetClusterData will create the
8368
    correct structure needed as input for the allocator.
8369

8370
    The checks for the completeness of the opcode must have already been
8371
    done.
8372

8373
    """
8374
    data = self.in_data
8375

    
8376
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
8377

    
8378
    if self.disk_template in constants.DTS_NET_MIRROR:
8379
      self.required_nodes = 2
8380
    else:
8381
      self.required_nodes = 1
8382
    request = {
8383
      "type": "allocate",
8384
      "name": self.name,
8385
      "disk_template": self.disk_template,
8386
      "tags": self.tags,
8387
      "os": self.os,
8388
      "vcpus": self.vcpus,
8389
      "memory": self.mem_size,
8390
      "disks": self.disks,
8391
      "disk_space_total": disk_space,
8392
      "nics": self.nics,
8393
      "required_nodes": self.required_nodes,
8394
      }
8395
    data["request"] = request
8396

    
8397
  def _AddRelocateInstance(self):
8398
    """Add relocate instance data to allocator structure.
8399

8400
    This in combination with _IAllocatorGetClusterData will create the
8401
    correct structure needed as input for the allocator.
8402

8403
    The checks for the completeness of the opcode must have already been
8404
    done.
8405

8406
    """
8407
    instance = self.cfg.GetInstanceInfo(self.name)
8408
    if instance is None:
8409
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
8410
                                   " IAllocator" % self.name)
8411

    
8412
    if instance.disk_template not in constants.DTS_NET_MIRROR:
8413
      raise errors.OpPrereqError("Can't relocate non-mirrored instances")
8414

    
8415
    if len(instance.secondary_nodes) != 1:
8416
      raise errors.OpPrereqError("Instance has not exactly one secondary node")
8417

    
8418
    self.required_nodes = 1
8419
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
8420
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
8421

    
8422
    request = {
8423
      "type": "relocate",
8424
      "name": self.name,
8425
      "disk_space_total": disk_space,
8426
      "required_nodes": self.required_nodes,
8427
      "relocate_from": self.relocate_from,
8428
      }
8429
    self.in_data["request"] = request
8430

    
8431
  def _BuildInputData(self):
8432
    """Build input data structures.
8433

8434
    """
8435
    self._ComputeClusterData()
8436

    
8437
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8438
      self._AddNewInstance()
8439
    else:
8440
      self._AddRelocateInstance()
8441

    
8442
    self.in_text = serializer.Dump(self.in_data)
8443

    
8444
  def Run(self, name, validate=True, call_fn=None):
8445
    """Run an instance allocator and return the results.
8446

8447
    """
8448
    if call_fn is None:
8449
      call_fn = self.rpc.call_iallocator_runner
8450

    
8451
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
8452
    result.Raise("Failure while running the iallocator script")
8453

    
8454
    self.out_text = result.payload
8455
    if validate:
8456
      self._ValidateResult()
8457

    
8458
  def _ValidateResult(self):
8459
    """Process the allocator results.
8460

8461
    This will process and if successful save the result in
8462
    self.out_data and the other parameters.
8463

8464
    """
8465
    try:
8466
      rdict = serializer.Load(self.out_text)
8467
    except Exception, err:
8468
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
8469

    
8470
    if not isinstance(rdict, dict):
8471
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
8472

    
8473
    for key in "success", "info", "nodes":
8474
      if key not in rdict:
8475
        raise errors.OpExecError("Can't parse iallocator results:"
8476
                                 " missing key '%s'" % key)
8477
      setattr(self, key, rdict[key])
8478

    
8479
    if not isinstance(rdict["nodes"], list):
8480
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
8481
                               " is not a list")
8482
    self.out_data = rdict
8483

    
8484

    
8485
class LUTestAllocator(NoHooksLU):
8486
  """Run allocator tests.
8487

8488
  This LU runs the allocator tests
8489

8490
  """
8491
  _OP_REQP = ["direction", "mode", "name"]
8492

    
8493
  def CheckPrereq(self):
8494
    """Check prerequisites.
8495

8496
    This checks the opcode parameters depending on the director and mode test.
8497

8498
    """
8499
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8500
      for attr in ["name", "mem_size", "disks", "disk_template",
8501
                   "os", "tags", "nics", "vcpus"]:
8502
        if not hasattr(self.op, attr):
8503
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
8504
                                     attr)
8505
      iname = self.cfg.ExpandInstanceName(self.op.name)
8506
      if iname is not None:
8507
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
8508
                                   iname)
8509
      if not isinstance(self.op.nics, list):
8510
        raise errors.OpPrereqError("Invalid parameter 'nics'")
8511
      for row in self.op.nics:
8512
        if (not isinstance(row, dict) or
8513
            "mac" not in row or
8514
            "ip" not in row or
8515
            "bridge" not in row):
8516
          raise errors.OpPrereqError("Invalid contents of the"
8517
                                     " 'nics' parameter")
8518
      if not isinstance(self.op.disks, list):
8519
        raise errors.OpPrereqError("Invalid parameter 'disks'")
8520
      for row in self.op.disks:
8521
        if (not isinstance(row, dict) or
8522
            "size" not in row or
8523
            not isinstance(row["size"], int) or
8524
            "mode" not in row or
8525
            row["mode"] not in ['r', 'w']):
8526
          raise errors.OpPrereqError("Invalid contents of the"
8527
                                     " 'disks' parameter")
8528
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
8529
        self.op.hypervisor = self.cfg.GetHypervisorType()
8530
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
8531
      if not hasattr(self.op, "name"):
8532
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
8533
      fname = self.cfg.ExpandInstanceName(self.op.name)
8534
      if fname is None:
8535
        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
8536
                                   self.op.name)
8537
      self.op.name = fname
8538
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
8539
    else:
8540
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
8541
                                 self.op.mode)
8542

    
8543
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
8544
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
8545
        raise errors.OpPrereqError("Missing allocator name")
8546
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
8547
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
8548
                                 self.op.direction)
8549

    
8550
  def Exec(self, feedback_fn):
8551
    """Run the allocator test.
8552

8553
    """
8554
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8555
      ial = IAllocator(self.cfg, self.rpc,
8556
                       mode=self.op.mode,
8557
                       name=self.op.name,
8558
                       mem_size=self.op.mem_size,
8559
                       disks=self.op.disks,
8560
                       disk_template=self.op.disk_template,
8561
                       os=self.op.os,
8562
                       tags=self.op.tags,
8563
                       nics=self.op.nics,
8564
                       vcpus=self.op.vcpus,
8565
                       hypervisor=self.op.hypervisor,
8566
                       )
8567
    else:
8568
      ial = IAllocator(self.cfg, self.rpc,
8569
                       mode=self.op.mode,
8570
                       name=self.op.name,
8571
                       relocate_from=list(self.relocate_from),
8572
                       )
8573

    
8574
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
8575
      result = ial.in_text
8576
    else:
8577
      ial.Run(self.op.allocator, validate=False)
8578
      result = ial.out_text
8579
    return result