Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 3e53a60b

History | View | Annotate | Download (296.5 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0613,W0201
25

    
26
import os
27
import os.path
28
import time
29
import re
30
import platform
31
import logging
32
import copy
33

    
34
from ganeti import ssh
35
from ganeti import utils
36
from ganeti import errors
37
from ganeti import hypervisor
38
from ganeti import locking
39
from ganeti import constants
40
from ganeti import objects
41
from ganeti import serializer
42
from ganeti import ssconf
43

    
44

    
45
class LogicalUnit(object):
46
  """Logical Unit base class.
47

48
  Subclasses must follow these rules:
49
    - implement ExpandNames
50
    - implement CheckPrereq (except when tasklets are used)
51
    - implement Exec (except when tasklets are used)
52
    - implement BuildHooksEnv
53
    - redefine HPATH and HTYPE
54
    - optionally redefine their run requirements:
55
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
56

57
  Note that all commands require root permissions.
58

59
  @ivar dry_run_result: the value (if any) that will be returned to the caller
60
      in dry-run mode (signalled by opcode dry_run parameter)
61

62
  """
63
  HPATH = None
64
  HTYPE = None
65
  _OP_REQP = []
66
  REQ_BGL = True
67

    
68
  def __init__(self, processor, op, context, rpc):
69
    """Constructor for LogicalUnit.
70

71
    This needs to be overridden in derived classes in order to check op
72
    validity.
73

74
    """
75
    self.proc = processor
76
    self.op = op
77
    self.cfg = context.cfg
78
    self.context = context
79
    self.rpc = rpc
80
    # Dicts used to declare locking needs to mcpu
81
    self.needed_locks = None
82
    self.acquired_locks = {}
83
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
84
    self.add_locks = {}
85
    self.remove_locks = {}
86
    # Used to force good behavior when calling helper functions
87
    self.recalculate_locks = {}
88
    self.__ssh = None
89
    # logging
90
    self.LogWarning = processor.LogWarning
91
    self.LogInfo = processor.LogInfo
92
    self.LogStep = processor.LogStep
93
    # support for dry-run
94
    self.dry_run_result = None
95

    
96
    # Tasklets
97
    self.tasklets = None
98

    
99
    for attr_name in self._OP_REQP:
100
      attr_val = getattr(op, attr_name, None)
101
      if attr_val is None:
102
        raise errors.OpPrereqError("Required parameter '%s' missing" %
103
                                   attr_name)
104

    
105
    self.CheckArguments()
106

    
107
  def __GetSSH(self):
108
    """Returns the SshRunner object
109

110
    """
111
    if not self.__ssh:
112
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
113
    return self.__ssh
114

    
115
  ssh = property(fget=__GetSSH)
116

    
117
  def CheckArguments(self):
118
    """Check syntactic validity for the opcode arguments.
119

120
    This method is for doing a simple syntactic check and ensure
121
    validity of opcode parameters, without any cluster-related
122
    checks. While the same can be accomplished in ExpandNames and/or
123
    CheckPrereq, doing these separate is better because:
124

125
      - ExpandNames is left as as purely a lock-related function
126
      - CheckPrereq is run after we have acquired locks (and possible
127
        waited for them)
128

129
    The function is allowed to change the self.op attribute so that
130
    later methods can no longer worry about missing parameters.
131

132
    """
133
    pass
134

    
135
  def ExpandNames(self):
136
    """Expand names for this LU.
137

138
    This method is called before starting to execute the opcode, and it should
139
    update all the parameters of the opcode to their canonical form (e.g. a
140
    short node name must be fully expanded after this method has successfully
141
    completed). This way locking, hooks, logging, ecc. can work correctly.
142

143
    LUs which implement this method must also populate the self.needed_locks
144
    member, as a dict with lock levels as keys, and a list of needed lock names
145
    as values. Rules:
146

147
      - use an empty dict if you don't need any lock
148
      - if you don't need any lock at a particular level omit that level
149
      - don't put anything for the BGL level
150
      - if you want all locks at a level use locking.ALL_SET as a value
151

152
    If you need to share locks (rather than acquire them exclusively) at one
153
    level you can modify self.share_locks, setting a true value (usually 1) for
154
    that level. By default locks are not shared.
155

156
    This function can also define a list of tasklets, which then will be
157
    executed in order instead of the usual LU-level CheckPrereq and Exec
158
    functions, if those are not defined by the LU.
159

160
    Examples::
161

162
      # Acquire all nodes and one instance
163
      self.needed_locks = {
164
        locking.LEVEL_NODE: locking.ALL_SET,
165
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
166
      }
167
      # Acquire just two nodes
168
      self.needed_locks = {
169
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
170
      }
171
      # Acquire no locks
172
      self.needed_locks = {} # No, you can't leave it to the default value None
173

174
    """
175
    # The implementation of this method is mandatory only if the new LU is
176
    # concurrent, so that old LUs don't need to be changed all at the same
177
    # time.
178
    if self.REQ_BGL:
179
      self.needed_locks = {} # Exclusive LUs don't need locks.
180
    else:
181
      raise NotImplementedError
182

    
183
  def DeclareLocks(self, level):
184
    """Declare LU locking needs for a level
185

186
    While most LUs can just declare their locking needs at ExpandNames time,
187
    sometimes there's the need to calculate some locks after having acquired
188
    the ones before. This function is called just before acquiring locks at a
189
    particular level, but after acquiring the ones at lower levels, and permits
190
    such calculations. It can be used to modify self.needed_locks, and by
191
    default it does nothing.
192

193
    This function is only called if you have something already set in
194
    self.needed_locks for the level.
195

196
    @param level: Locking level which is going to be locked
197
    @type level: member of ganeti.locking.LEVELS
198

199
    """
200

    
201
  def CheckPrereq(self):
202
    """Check prerequisites for this LU.
203

204
    This method should check that the prerequisites for the execution
205
    of this LU are fulfilled. It can do internode communication, but
206
    it should be idempotent - no cluster or system changes are
207
    allowed.
208

209
    The method should raise errors.OpPrereqError in case something is
210
    not fulfilled. Its return value is ignored.
211

212
    This method should also update all the parameters of the opcode to
213
    their canonical form if it hasn't been done by ExpandNames before.
214

215
    """
216
    if self.tasklets is not None:
217
      for (idx, tl) in enumerate(self.tasklets):
218
        logging.debug("Checking prerequisites for tasklet %s/%s",
219
                      idx + 1, len(self.tasklets))
220
        tl.CheckPrereq()
221
    else:
222
      raise NotImplementedError
223

    
224
  def Exec(self, feedback_fn):
225
    """Execute the LU.
226

227
    This method should implement the actual work. It should raise
228
    errors.OpExecError for failures that are somewhat dealt with in
229
    code, or expected.
230

231
    """
232
    if self.tasklets is not None:
233
      for (idx, tl) in enumerate(self.tasklets):
234
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
235
        tl.Exec(feedback_fn)
236
    else:
237
      raise NotImplementedError
238

    
239
  def BuildHooksEnv(self):
240
    """Build hooks environment for this LU.
241

242
    This method should return a three-node tuple consisting of: a dict
243
    containing the environment that will be used for running the
244
    specific hook for this LU, a list of node names on which the hook
245
    should run before the execution, and a list of node names on which
246
    the hook should run after the execution.
247

248
    The keys of the dict must not have 'GANETI_' prefixed as this will
249
    be handled in the hooks runner. Also note additional keys will be
250
    added by the hooks runner. If the LU doesn't define any
251
    environment, an empty dict (and not None) should be returned.
252

253
    No nodes should be returned as an empty list (and not None).
254

255
    Note that if the HPATH for a LU class is None, this function will
256
    not be called.
257

258
    """
259
    raise NotImplementedError
260

    
261
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
262
    """Notify the LU about the results of its hooks.
263

264
    This method is called every time a hooks phase is executed, and notifies
265
    the Logical Unit about the hooks' result. The LU can then use it to alter
266
    its result based on the hooks.  By default the method does nothing and the
267
    previous result is passed back unchanged but any LU can define it if it
268
    wants to use the local cluster hook-scripts somehow.
269

270
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
271
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
272
    @param hook_results: the results of the multi-node hooks rpc call
273
    @param feedback_fn: function used send feedback back to the caller
274
    @param lu_result: the previous Exec result this LU had, or None
275
        in the PRE phase
276
    @return: the new Exec result, based on the previous result
277
        and hook results
278

279
    """
280
    return lu_result
281

    
282
  def _ExpandAndLockInstance(self):
283
    """Helper function to expand and lock an instance.
284

285
    Many LUs that work on an instance take its name in self.op.instance_name
286
    and need to expand it and then declare the expanded name for locking. This
287
    function does it, and then updates self.op.instance_name to the expanded
288
    name. It also initializes needed_locks as a dict, if this hasn't been done
289
    before.
290

291
    """
292
    if self.needed_locks is None:
293
      self.needed_locks = {}
294
    else:
295
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
296
        "_ExpandAndLockInstance called with instance-level locks set"
297
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
298
    if expanded_name is None:
299
      raise errors.OpPrereqError("Instance '%s' not known" %
300
                                  self.op.instance_name)
301
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
302
    self.op.instance_name = expanded_name
303

    
304
  def _LockInstancesNodes(self, primary_only=False):
305
    """Helper function to declare instances' nodes for locking.
306

307
    This function should be called after locking one or more instances to lock
308
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
309
    with all primary or secondary nodes for instances already locked and
310
    present in self.needed_locks[locking.LEVEL_INSTANCE].
311

312
    It should be called from DeclareLocks, and for safety only works if
313
    self.recalculate_locks[locking.LEVEL_NODE] is set.
314

315
    In the future it may grow parameters to just lock some instance's nodes, or
316
    to just lock primaries or secondary nodes, if needed.
317

318
    If should be called in DeclareLocks in a way similar to::
319

320
      if level == locking.LEVEL_NODE:
321
        self._LockInstancesNodes()
322

323
    @type primary_only: boolean
324
    @param primary_only: only lock primary nodes of locked instances
325

326
    """
327
    assert locking.LEVEL_NODE in self.recalculate_locks, \
328
      "_LockInstancesNodes helper function called with no nodes to recalculate"
329

    
330
    # TODO: check if we're really been called with the instance locks held
331

    
332
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
333
    # future we might want to have different behaviors depending on the value
334
    # of self.recalculate_locks[locking.LEVEL_NODE]
335
    wanted_nodes = []
336
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
337
      instance = self.context.cfg.GetInstanceInfo(instance_name)
338
      wanted_nodes.append(instance.primary_node)
339
      if not primary_only:
340
        wanted_nodes.extend(instance.secondary_nodes)
341

    
342
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
343
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
344
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
345
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
346

    
347
    del self.recalculate_locks[locking.LEVEL_NODE]
348

    
349

    
350
class NoHooksLU(LogicalUnit):
351
  """Simple LU which runs no hooks.
352

353
  This LU is intended as a parent for other LogicalUnits which will
354
  run no hooks, in order to reduce duplicate code.
355

356
  """
357
  HPATH = None
358
  HTYPE = None
359

    
360

    
361
class Tasklet:
362
  """Tasklet base class.
363

364
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
365
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
366
  tasklets know nothing about locks.
367

368
  Subclasses must follow these rules:
369
    - Implement CheckPrereq
370
    - Implement Exec
371

372
  """
373
  def __init__(self, lu):
374
    self.lu = lu
375

    
376
    # Shortcuts
377
    self.cfg = lu.cfg
378
    self.rpc = lu.rpc
379

    
380
  def CheckPrereq(self):
381
    """Check prerequisites for this tasklets.
382

383
    This method should check whether the prerequisites for the execution of
384
    this tasklet are fulfilled. It can do internode communication, but it
385
    should be idempotent - no cluster or system changes are allowed.
386

387
    The method should raise errors.OpPrereqError in case something is not
388
    fulfilled. Its return value is ignored.
389

390
    This method should also update all parameters to their canonical form if it
391
    hasn't been done before.
392

393
    """
394
    raise NotImplementedError
395

    
396
  def Exec(self, feedback_fn):
397
    """Execute the tasklet.
398

399
    This method should implement the actual work. It should raise
400
    errors.OpExecError for failures that are somewhat dealt with in code, or
401
    expected.
402

403
    """
404
    raise NotImplementedError
405

    
406

    
407
def _GetWantedNodes(lu, nodes):
408
  """Returns list of checked and expanded node names.
409

410
  @type lu: L{LogicalUnit}
411
  @param lu: the logical unit on whose behalf we execute
412
  @type nodes: list
413
  @param nodes: list of node names or None for all nodes
414
  @rtype: list
415
  @return: the list of nodes, sorted
416
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
417

418
  """
419
  if not isinstance(nodes, list):
420
    raise errors.OpPrereqError("Invalid argument type 'nodes'")
421

    
422
  if not nodes:
423
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
424
      " non-empty list of nodes whose name is to be expanded.")
425

    
426
  wanted = []
427
  for name in nodes:
428
    node = lu.cfg.ExpandNodeName(name)
429
    if node is None:
430
      raise errors.OpPrereqError("No such node name '%s'" % name)
431
    wanted.append(node)
432

    
433
  return utils.NiceSort(wanted)
434

    
435

    
436
def _GetWantedInstances(lu, instances):
437
  """Returns list of checked and expanded instance names.
438

439
  @type lu: L{LogicalUnit}
440
  @param lu: the logical unit on whose behalf we execute
441
  @type instances: list
442
  @param instances: list of instance names or None for all instances
443
  @rtype: list
444
  @return: the list of instances, sorted
445
  @raise errors.OpPrereqError: if the instances parameter is wrong type
446
  @raise errors.OpPrereqError: if any of the passed instances is not found
447

448
  """
449
  if not isinstance(instances, list):
450
    raise errors.OpPrereqError("Invalid argument type 'instances'")
451

    
452
  if instances:
453
    wanted = []
454

    
455
    for name in instances:
456
      instance = lu.cfg.ExpandInstanceName(name)
457
      if instance is None:
458
        raise errors.OpPrereqError("No such instance name '%s'" % name)
459
      wanted.append(instance)
460

    
461
  else:
462
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
463
  return wanted
464

    
465

    
466
def _CheckOutputFields(static, dynamic, selected):
467
  """Checks whether all selected fields are valid.
468

469
  @type static: L{utils.FieldSet}
470
  @param static: static fields set
471
  @type dynamic: L{utils.FieldSet}
472
  @param dynamic: dynamic fields set
473

474
  """
475
  f = utils.FieldSet()
476
  f.Extend(static)
477
  f.Extend(dynamic)
478

    
479
  delta = f.NonMatching(selected)
480
  if delta:
481
    raise errors.OpPrereqError("Unknown output fields selected: %s"
482
                               % ",".join(delta))
483

    
484

    
485
def _CheckBooleanOpField(op, name):
486
  """Validates boolean opcode parameters.
487

488
  This will ensure that an opcode parameter is either a boolean value,
489
  or None (but that it always exists).
490

491
  """
492
  val = getattr(op, name, None)
493
  if not (val is None or isinstance(val, bool)):
494
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
495
                               (name, str(val)))
496
  setattr(op, name, val)
497

    
498

    
499
def _CheckNodeOnline(lu, node):
500
  """Ensure that a given node is online.
501

502
  @param lu: the LU on behalf of which we make the check
503
  @param node: the node to check
504
  @raise errors.OpPrereqError: if the node is offline
505

506
  """
507
  if lu.cfg.GetNodeInfo(node).offline:
508
    raise errors.OpPrereqError("Can't use offline node %s" % node)
509

    
510

    
511
def _CheckNodeNotDrained(lu, node):
512
  """Ensure that a given node is not drained.
513

514
  @param lu: the LU on behalf of which we make the check
515
  @param node: the node to check
516
  @raise errors.OpPrereqError: if the node is drained
517

518
  """
519
  if lu.cfg.GetNodeInfo(node).drained:
520
    raise errors.OpPrereqError("Can't use drained node %s" % node)
521

    
522

    
523
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
524
                          memory, vcpus, nics, disk_template, disks,
525
                          bep, hvp, hypervisor_name):
526
  """Builds instance related env variables for hooks
527

528
  This builds the hook environment from individual variables.
529

530
  @type name: string
531
  @param name: the name of the instance
532
  @type primary_node: string
533
  @param primary_node: the name of the instance's primary node
534
  @type secondary_nodes: list
535
  @param secondary_nodes: list of secondary nodes as strings
536
  @type os_type: string
537
  @param os_type: the name of the instance's OS
538
  @type status: boolean
539
  @param status: the should_run status of the instance
540
  @type memory: string
541
  @param memory: the memory size of the instance
542
  @type vcpus: string
543
  @param vcpus: the count of VCPUs the instance has
544
  @type nics: list
545
  @param nics: list of tuples (ip, mac, mode, link) representing
546
      the NICs the instance has
547
  @type disk_template: string
548
  @param disk_template: the disk template of the instance
549
  @type disks: list
550
  @param disks: the list of (size, mode) pairs
551
  @type bep: dict
552
  @param bep: the backend parameters for the instance
553
  @type hvp: dict
554
  @param hvp: the hypervisor parameters for the instance
555
  @type hypervisor_name: string
556
  @param hypervisor_name: the hypervisor for the instance
557
  @rtype: dict
558
  @return: the hook environment for this instance
559

560
  """
561
  if status:
562
    str_status = "up"
563
  else:
564
    str_status = "down"
565
  env = {
566
    "OP_TARGET": name,
567
    "INSTANCE_NAME": name,
568
    "INSTANCE_PRIMARY": primary_node,
569
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
570
    "INSTANCE_OS_TYPE": os_type,
571
    "INSTANCE_STATUS": str_status,
572
    "INSTANCE_MEMORY": memory,
573
    "INSTANCE_VCPUS": vcpus,
574
    "INSTANCE_DISK_TEMPLATE": disk_template,
575
    "INSTANCE_HYPERVISOR": hypervisor_name,
576
  }
577

    
578
  if nics:
579
    nic_count = len(nics)
580
    for idx, (ip, mac, mode, link) in enumerate(nics):
581
      if ip is None:
582
        ip = ""
583
      env["INSTANCE_NIC%d_IP" % idx] = ip
584
      env["INSTANCE_NIC%d_MAC" % idx] = mac
585
      env["INSTANCE_NIC%d_MODE" % idx] = mode
586
      env["INSTANCE_NIC%d_LINK" % idx] = link
587
      if mode == constants.NIC_MODE_BRIDGED:
588
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
589
  else:
590
    nic_count = 0
591

    
592
  env["INSTANCE_NIC_COUNT"] = nic_count
593

    
594
  if disks:
595
    disk_count = len(disks)
596
    for idx, (size, mode) in enumerate(disks):
597
      env["INSTANCE_DISK%d_SIZE" % idx] = size
598
      env["INSTANCE_DISK%d_MODE" % idx] = mode
599
  else:
600
    disk_count = 0
601

    
602
  env["INSTANCE_DISK_COUNT"] = disk_count
603

    
604
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
605
    for key, value in source.items():
606
      env["INSTANCE_%s_%s" % (kind, key)] = value
607

    
608
  return env
609

    
610

    
611
def _NICListToTuple(lu, nics):
612
  """Build a list of nic information tuples.
613

614
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
615
  value in LUQueryInstanceData.
616

617
  @type lu:  L{LogicalUnit}
618
  @param lu: the logical unit on whose behalf we execute
619
  @type nics: list of L{objects.NIC}
620
  @param nics: list of nics to convert to hooks tuples
621

622
  """
623
  hooks_nics = []
624
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
625
  for nic in nics:
626
    ip = nic.ip
627
    mac = nic.mac
628
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
629
    mode = filled_params[constants.NIC_MODE]
630
    link = filled_params[constants.NIC_LINK]
631
    hooks_nics.append((ip, mac, mode, link))
632
  return hooks_nics
633

    
634

    
635
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
636
  """Builds instance related env variables for hooks from an object.
637

638
  @type lu: L{LogicalUnit}
639
  @param lu: the logical unit on whose behalf we execute
640
  @type instance: L{objects.Instance}
641
  @param instance: the instance for which we should build the
642
      environment
643
  @type override: dict
644
  @param override: dictionary with key/values that will override
645
      our values
646
  @rtype: dict
647
  @return: the hook environment dictionary
648

649
  """
650
  cluster = lu.cfg.GetClusterInfo()
651
  bep = cluster.FillBE(instance)
652
  hvp = cluster.FillHV(instance)
653
  args = {
654
    'name': instance.name,
655
    'primary_node': instance.primary_node,
656
    'secondary_nodes': instance.secondary_nodes,
657
    'os_type': instance.os,
658
    'status': instance.admin_up,
659
    'memory': bep[constants.BE_MEMORY],
660
    'vcpus': bep[constants.BE_VCPUS],
661
    'nics': _NICListToTuple(lu, instance.nics),
662
    'disk_template': instance.disk_template,
663
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
664
    'bep': bep,
665
    'hvp': hvp,
666
    'hypervisor_name': instance.hypervisor,
667
  }
668
  if override:
669
    args.update(override)
670
  return _BuildInstanceHookEnv(**args)
671

    
672

    
673
def _AdjustCandidatePool(lu, exceptions):
674
  """Adjust the candidate pool after node operations.
675

676
  """
677
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
678
  if mod_list:
679
    lu.LogInfo("Promoted nodes to master candidate role: %s",
680
               ", ".join(node.name for node in mod_list))
681
    for name in mod_list:
682
      lu.context.ReaddNode(name)
683
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
684
  if mc_now > mc_max:
685
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
686
               (mc_now, mc_max))
687

    
688

    
689
def _DecideSelfPromotion(lu, exceptions=None):
690
  """Decide whether I should promote myself as a master candidate.
691

692
  """
693
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
694
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
695
  # the new node will increase mc_max with one, so:
696
  mc_should = min(mc_should + 1, cp_size)
697
  return mc_now < mc_should
698

    
699

    
700
def _CheckNicsBridgesExist(lu, target_nics, target_node,
701
                               profile=constants.PP_DEFAULT):
702
  """Check that the brigdes needed by a list of nics exist.
703

704
  """
705
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
706
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
707
                for nic in target_nics]
708
  brlist = [params[constants.NIC_LINK] for params in paramslist
709
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
710
  if brlist:
711
    result = lu.rpc.call_bridges_exist(target_node, brlist)
712
    result.Raise("Error checking bridges on destination node '%s'" %
713
                 target_node, prereq=True)
714

    
715

    
716
def _CheckInstanceBridgesExist(lu, instance, node=None):
717
  """Check that the brigdes needed by an instance exist.
718

719
  """
720
  if node is None:
721
    node = instance.primary_node
722
  _CheckNicsBridgesExist(lu, instance.nics, node)
723

    
724

    
725
def _CheckOSVariant(os_obj, name):
726
  """Check whether an OS name conforms to the os variants specification.
727

728
  @type os_obj: L{objects.OS}
729
  @param os_obj: OS object to check
730
  @type name: string
731
  @param name: OS name passed by the user, to check for validity
732

733
  """
734
  if not os_obj.supported_variants:
735
    return
736
  try:
737
    variant = name.split("+", 1)[1]
738
  except IndexError:
739
    raise errors.OpPrereqError("OS name must include a variant")
740

    
741
  if variant not in os_obj.supported_variants:
742
    raise errors.OpPrereqError("Unsupported OS variant")
743

    
744

    
745
def _GetNodeInstancesInner(cfg, fn):
746
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
747

    
748

    
749
def _GetNodeInstances(cfg, node_name):
750
  """Returns a list of all primary and secondary instances on a node.
751

752
  """
753

    
754
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
755

    
756

    
757
def _GetNodePrimaryInstances(cfg, node_name):
758
  """Returns primary instances on a node.
759

760
  """
761
  return _GetNodeInstancesInner(cfg,
762
                                lambda inst: node_name == inst.primary_node)
763

    
764

    
765
def _GetNodeSecondaryInstances(cfg, node_name):
766
  """Returns secondary instances on a node.
767

768
  """
769
  return _GetNodeInstancesInner(cfg,
770
                                lambda inst: node_name in inst.secondary_nodes)
771

    
772

    
773
def _GetStorageTypeArgs(cfg, storage_type):
774
  """Returns the arguments for a storage type.
775

776
  """
777
  # Special case for file storage
778
  if storage_type == constants.ST_FILE:
779
    # storage.FileStorage wants a list of storage directories
780
    return [[cfg.GetFileStorageDir()]]
781

    
782
  return []
783

    
784

    
785
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
786
  faulty = []
787

    
788
  for dev in instance.disks:
789
    cfg.SetDiskID(dev, node_name)
790

    
791
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
792
  result.Raise("Failed to get disk status from node %s" % node_name,
793
               prereq=prereq)
794

    
795
  for idx, bdev_status in enumerate(result.payload):
796
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
797
      faulty.append(idx)
798

    
799
  return faulty
800

    
801

    
802
class LUPostInitCluster(LogicalUnit):
803
  """Logical unit for running hooks after cluster initialization.
804

805
  """
806
  HPATH = "cluster-init"
807
  HTYPE = constants.HTYPE_CLUSTER
808
  _OP_REQP = []
809

    
810
  def BuildHooksEnv(self):
811
    """Build hooks env.
812

813
    """
814
    env = {"OP_TARGET": self.cfg.GetClusterName()}
815
    mn = self.cfg.GetMasterNode()
816
    return env, [], [mn]
817

    
818
  def CheckPrereq(self):
819
    """No prerequisites to check.
820

821
    """
822
    return True
823

    
824
  def Exec(self, feedback_fn):
825
    """Nothing to do.
826

827
    """
828
    return True
829

    
830

    
831
class LUDestroyCluster(LogicalUnit):
832
  """Logical unit for destroying the cluster.
833

834
  """
835
  HPATH = "cluster-destroy"
836
  HTYPE = constants.HTYPE_CLUSTER
837
  _OP_REQP = []
838

    
839
  def BuildHooksEnv(self):
840
    """Build hooks env.
841

842
    """
843
    env = {"OP_TARGET": self.cfg.GetClusterName()}
844
    return env, [], []
845

    
846
  def CheckPrereq(self):
847
    """Check prerequisites.
848

849
    This checks whether the cluster is empty.
850

851
    Any errors are signaled by raising errors.OpPrereqError.
852

853
    """
854
    master = self.cfg.GetMasterNode()
855

    
856
    nodelist = self.cfg.GetNodeList()
857
    if len(nodelist) != 1 or nodelist[0] != master:
858
      raise errors.OpPrereqError("There are still %d node(s) in"
859
                                 " this cluster." % (len(nodelist) - 1))
860
    instancelist = self.cfg.GetInstanceList()
861
    if instancelist:
862
      raise errors.OpPrereqError("There are still %d instance(s) in"
863
                                 " this cluster." % len(instancelist))
864

    
865
  def Exec(self, feedback_fn):
866
    """Destroys the cluster.
867

868
    """
869
    master = self.cfg.GetMasterNode()
870
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
871

    
872
    # Run post hooks on master node before it's removed
873
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
874
    try:
875
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
876
    except:
877
      self.LogWarning("Errors occurred running hooks on %s" % master)
878

    
879
    result = self.rpc.call_node_stop_master(master, False)
880
    result.Raise("Could not disable the master role")
881

    
882
    if modify_ssh_setup:
883
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
884
      utils.CreateBackup(priv_key)
885
      utils.CreateBackup(pub_key)
886

    
887
    return master
888

    
889

    
890
class LUVerifyCluster(LogicalUnit):
891
  """Verifies the cluster status.
892

893
  """
894
  HPATH = "cluster-verify"
895
  HTYPE = constants.HTYPE_CLUSTER
896
  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
897
  REQ_BGL = False
898

    
899
  TCLUSTER = "cluster"
900
  TNODE = "node"
901
  TINSTANCE = "instance"
902

    
903
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
904
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
905
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
906
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
907
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
908
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
909
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
910
  ENODEDRBD = (TNODE, "ENODEDRBD")
911
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
912
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
913
  ENODEHV = (TNODE, "ENODEHV")
914
  ENODELVM = (TNODE, "ENODELVM")
915
  ENODEN1 = (TNODE, "ENODEN1")
916
  ENODENET = (TNODE, "ENODENET")
917
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
918
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
919
  ENODERPC = (TNODE, "ENODERPC")
920
  ENODESSH = (TNODE, "ENODESSH")
921
  ENODEVERSION = (TNODE, "ENODEVERSION")
922
  ENODESETUP = (TNODE, "ENODESETUP")
923

    
924
  ETYPE_FIELD = "code"
925
  ETYPE_ERROR = "ERROR"
926
  ETYPE_WARNING = "WARNING"
927

    
928
  def ExpandNames(self):
929
    self.needed_locks = {
930
      locking.LEVEL_NODE: locking.ALL_SET,
931
      locking.LEVEL_INSTANCE: locking.ALL_SET,
932
    }
933
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
934

    
935
  def _Error(self, ecode, item, msg, *args, **kwargs):
936
    """Format an error message.
937

938
    Based on the opcode's error_codes parameter, either format a
939
    parseable error code, or a simpler error string.
940

941
    This must be called only from Exec and functions called from Exec.
942

943
    """
944
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
945
    itype, etxt = ecode
946
    # first complete the msg
947
    if args:
948
      msg = msg % args
949
    # then format the whole message
950
    if self.op.error_codes:
951
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
952
    else:
953
      if item:
954
        item = " " + item
955
      else:
956
        item = ""
957
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
958
    # and finally report it via the feedback_fn
959
    self._feedback_fn("  - %s" % msg)
960

    
961
  def _ErrorIf(self, cond, *args, **kwargs):
962
    """Log an error message if the passed condition is True.
963

964
    """
965
    cond = bool(cond) or self.op.debug_simulate_errors
966
    if cond:
967
      self._Error(*args, **kwargs)
968
    # do not mark the operation as failed for WARN cases only
969
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
970
      self.bad = self.bad or cond
971

    
972
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
973
                  node_result, master_files, drbd_map, vg_name):
974
    """Run multiple tests against a node.
975

976
    Test list:
977

978
      - compares ganeti version
979
      - checks vg existence and size > 20G
980
      - checks config file checksum
981
      - checks ssh to other nodes
982

983
    @type nodeinfo: L{objects.Node}
984
    @param nodeinfo: the node to check
985
    @param file_list: required list of files
986
    @param local_cksum: dictionary of local files and their checksums
987
    @param node_result: the results from the node
988
    @param master_files: list of files that only masters should have
989
    @param drbd_map: the useddrbd minors for this node, in
990
        form of minor: (instance, must_exist) which correspond to instances
991
        and their running status
992
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
993

994
    """
995
    node = nodeinfo.name
996
    _ErrorIf = self._ErrorIf
997

    
998
    # main result, node_result should be a non-empty dict
999
    test = not node_result or not isinstance(node_result, dict)
1000
    _ErrorIf(test, self.ENODERPC, node,
1001
                  "unable to verify node: no data returned")
1002
    if test:
1003
      return
1004

    
1005
    # compares ganeti version
1006
    local_version = constants.PROTOCOL_VERSION
1007
    remote_version = node_result.get('version', None)
1008
    test = not (remote_version and
1009
                isinstance(remote_version, (list, tuple)) and
1010
                len(remote_version) == 2)
1011
    _ErrorIf(test, self.ENODERPC, node,
1012
             "connection to node returned invalid data")
1013
    if test:
1014
      return
1015

    
1016
    test = local_version != remote_version[0]
1017
    _ErrorIf(test, self.ENODEVERSION, node,
1018
             "incompatible protocol versions: master %s,"
1019
             " node %s", local_version, remote_version[0])
1020
    if test:
1021
      return
1022

    
1023
    # node seems compatible, we can actually try to look into its results
1024

    
1025
    # full package version
1026
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1027
                  self.ENODEVERSION, node,
1028
                  "software version mismatch: master %s, node %s",
1029
                  constants.RELEASE_VERSION, remote_version[1],
1030
                  code=self.ETYPE_WARNING)
1031

    
1032
    # checks vg existence and size > 20G
1033
    if vg_name is not None:
1034
      vglist = node_result.get(constants.NV_VGLIST, None)
1035
      test = not vglist
1036
      _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1037
      if not test:
1038
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1039
                                              constants.MIN_VG_SIZE)
1040
        _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1041

    
1042
    # checks config file checksum
1043

    
1044
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
1045
    test = not isinstance(remote_cksum, dict)
1046
    _ErrorIf(test, self.ENODEFILECHECK, node,
1047
             "node hasn't returned file checksum data")
1048
    if not test:
1049
      for file_name in file_list:
1050
        node_is_mc = nodeinfo.master_candidate
1051
        must_have = (file_name not in master_files) or node_is_mc
1052
        # missing
1053
        test1 = file_name not in remote_cksum
1054
        # invalid checksum
1055
        test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1056
        # existing and good
1057
        test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1058
        _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1059
                 "file '%s' missing", file_name)
1060
        _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1061
                 "file '%s' has wrong checksum", file_name)
1062
        # not candidate and this is not a must-have file
1063
        _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1064
                 "file '%s' should not exist on non master"
1065
                 " candidates (and the file is outdated)", file_name)
1066
        # all good, except non-master/non-must have combination
1067
        _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1068
                 "file '%s' should not exist"
1069
                 " on non master candidates", file_name)
1070

    
1071
    # checks ssh to any
1072

    
1073
    test = constants.NV_NODELIST not in node_result
1074
    _ErrorIf(test, self.ENODESSH, node,
1075
             "node hasn't returned node ssh connectivity data")
1076
    if not test:
1077
      if node_result[constants.NV_NODELIST]:
1078
        for a_node, a_msg in node_result[constants.NV_NODELIST].items():
1079
          _ErrorIf(True, self.ENODESSH, node,
1080
                   "ssh communication with node '%s': %s", a_node, a_msg)
1081

    
1082
    test = constants.NV_NODENETTEST not in node_result
1083
    _ErrorIf(test, self.ENODENET, node,
1084
             "node hasn't returned node tcp connectivity data")
1085
    if not test:
1086
      if node_result[constants.NV_NODENETTEST]:
1087
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
1088
        for anode in nlist:
1089
          _ErrorIf(True, self.ENODENET, node,
1090
                   "tcp communication with node '%s': %s",
1091
                   anode, node_result[constants.NV_NODENETTEST][anode])
1092

    
1093
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
1094
    if isinstance(hyp_result, dict):
1095
      for hv_name, hv_result in hyp_result.iteritems():
1096
        test = hv_result is not None
1097
        _ErrorIf(test, self.ENODEHV, node,
1098
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1099

    
1100
    # check used drbd list
1101
    if vg_name is not None:
1102
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
1103
      test = not isinstance(used_minors, (tuple, list))
1104
      _ErrorIf(test, self.ENODEDRBD, node,
1105
               "cannot parse drbd status file: %s", str(used_minors))
1106
      if not test:
1107
        for minor, (iname, must_exist) in drbd_map.items():
1108
          test = minor not in used_minors and must_exist
1109
          _ErrorIf(test, self.ENODEDRBD, node,
1110
                   "drbd minor %d of instance %s is not active",
1111
                   minor, iname)
1112
        for minor in used_minors:
1113
          test = minor not in drbd_map
1114
          _ErrorIf(test, self.ENODEDRBD, node,
1115
                   "unallocated drbd minor %d is in use", minor)
1116
    test = node_result.get(constants.NV_NODESETUP,
1117
                           ["Missing NODESETUP results"])
1118
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1119
             "; ".join(test))
1120

    
1121
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1122
                      node_instance, n_offline):
1123
    """Verify an instance.
1124

1125
    This function checks to see if the required block devices are
1126
    available on the instance's node.
1127

1128
    """
1129
    _ErrorIf = self._ErrorIf
1130
    node_current = instanceconfig.primary_node
1131

    
1132
    node_vol_should = {}
1133
    instanceconfig.MapLVsByNode(node_vol_should)
1134

    
1135
    for node in node_vol_should:
1136
      if node in n_offline:
1137
        # ignore missing volumes on offline nodes
1138
        continue
1139
      for volume in node_vol_should[node]:
1140
        test = node not in node_vol_is or volume not in node_vol_is[node]
1141
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1142
                 "volume %s missing on node %s", volume, node)
1143

    
1144
    if instanceconfig.admin_up:
1145
      test = ((node_current not in node_instance or
1146
               not instance in node_instance[node_current]) and
1147
              node_current not in n_offline)
1148
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1149
               "instance not running on its primary node %s",
1150
               node_current)
1151

    
1152
    for node in node_instance:
1153
      if (not node == node_current):
1154
        test = instance in node_instance[node]
1155
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1156
                 "instance should not run on node %s", node)
1157

    
1158
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is):
1159
    """Verify if there are any unknown volumes in the cluster.
1160

1161
    The .os, .swap and backup volumes are ignored. All other volumes are
1162
    reported as unknown.
1163

1164
    """
1165
    for node in node_vol_is:
1166
      for volume in node_vol_is[node]:
1167
        test = (node not in node_vol_should or
1168
                volume not in node_vol_should[node])
1169
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1170
                      "volume %s is unknown", volume)
1171

    
1172
  def _VerifyOrphanInstances(self, instancelist, node_instance):
1173
    """Verify the list of running instances.
1174

1175
    This checks what instances are running but unknown to the cluster.
1176

1177
    """
1178
    for node in node_instance:
1179
      for o_inst in node_instance[node]:
1180
        test = o_inst not in instancelist
1181
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1182
                      "instance %s on node %s should not exist", o_inst, node)
1183

    
1184
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg):
1185
    """Verify N+1 Memory Resilience.
1186

1187
    Check that if one single node dies we can still start all the instances it
1188
    was primary for.
1189

1190
    """
1191
    for node, nodeinfo in node_info.iteritems():
1192
      # This code checks that every node which is now listed as secondary has
1193
      # enough memory to host all instances it is supposed to should a single
1194
      # other node in the cluster fail.
1195
      # FIXME: not ready for failover to an arbitrary node
1196
      # FIXME: does not support file-backed instances
1197
      # WARNING: we currently take into account down instances as well as up
1198
      # ones, considering that even if they're down someone might want to start
1199
      # them even in the event of a node failure.
1200
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1201
        needed_mem = 0
1202
        for instance in instances:
1203
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1204
          if bep[constants.BE_AUTO_BALANCE]:
1205
            needed_mem += bep[constants.BE_MEMORY]
1206
        test = nodeinfo['mfree'] < needed_mem
1207
        self._ErrorIf(test, self.ENODEN1, node,
1208
                      "not enough memory on to accommodate"
1209
                      " failovers should peer node %s fail", prinode)
1210

    
1211
  def CheckPrereq(self):
1212
    """Check prerequisites.
1213

1214
    Transform the list of checks we're going to skip into a set and check that
1215
    all its members are valid.
1216

1217
    """
1218
    self.skip_set = frozenset(self.op.skip_checks)
1219
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1220
      raise errors.OpPrereqError("Invalid checks to be skipped specified")
1221

    
1222
  def BuildHooksEnv(self):
1223
    """Build hooks env.
1224

1225
    Cluster-Verify hooks just ran in the post phase and their failure makes
1226
    the output be logged in the verify output and the verification to fail.
1227

1228
    """
1229
    all_nodes = self.cfg.GetNodeList()
1230
    env = {
1231
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1232
      }
1233
    for node in self.cfg.GetAllNodesInfo().values():
1234
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1235

    
1236
    return env, [], all_nodes
1237

    
1238
  def Exec(self, feedback_fn):
1239
    """Verify integrity of cluster, performing various test on nodes.
1240

1241
    """
1242
    self.bad = False
1243
    _ErrorIf = self._ErrorIf
1244
    verbose = self.op.verbose
1245
    self._feedback_fn = feedback_fn
1246
    feedback_fn("* Verifying global settings")
1247
    for msg in self.cfg.VerifyConfig():
1248
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1249

    
1250
    vg_name = self.cfg.GetVGName()
1251
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1252
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1253
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1254
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1255
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1256
                        for iname in instancelist)
1257
    i_non_redundant = [] # Non redundant instances
1258
    i_non_a_balanced = [] # Non auto-balanced instances
1259
    n_offline = [] # List of offline nodes
1260
    n_drained = [] # List of nodes being drained
1261
    node_volume = {}
1262
    node_instance = {}
1263
    node_info = {}
1264
    instance_cfg = {}
1265

    
1266
    # FIXME: verify OS list
1267
    # do local checksums
1268
    master_files = [constants.CLUSTER_CONF_FILE]
1269

    
1270
    file_names = ssconf.SimpleStore().GetFileList()
1271
    file_names.append(constants.SSL_CERT_FILE)
1272
    file_names.append(constants.RAPI_CERT_FILE)
1273
    file_names.extend(master_files)
1274

    
1275
    local_checksums = utils.FingerprintFiles(file_names)
1276

    
1277
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1278
    node_verify_param = {
1279
      constants.NV_FILELIST: file_names,
1280
      constants.NV_NODELIST: [node.name for node in nodeinfo
1281
                              if not node.offline],
1282
      constants.NV_HYPERVISOR: hypervisors,
1283
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1284
                                  node.secondary_ip) for node in nodeinfo
1285
                                 if not node.offline],
1286
      constants.NV_INSTANCELIST: hypervisors,
1287
      constants.NV_VERSION: None,
1288
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1289
      constants.NV_NODESETUP: None,
1290
      }
1291
    if vg_name is not None:
1292
      node_verify_param[constants.NV_VGLIST] = None
1293
      node_verify_param[constants.NV_LVLIST] = vg_name
1294
      node_verify_param[constants.NV_DRBDLIST] = None
1295
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1296
                                           self.cfg.GetClusterName())
1297

    
1298
    cluster = self.cfg.GetClusterInfo()
1299
    master_node = self.cfg.GetMasterNode()
1300
    all_drbd_map = self.cfg.ComputeDRBDMap()
1301

    
1302
    feedback_fn("* Verifying node status")
1303
    for node_i in nodeinfo:
1304
      node = node_i.name
1305

    
1306
      if node_i.offline:
1307
        if verbose:
1308
          feedback_fn("* Skipping offline node %s" % (node,))
1309
        n_offline.append(node)
1310
        continue
1311

    
1312
      if node == master_node:
1313
        ntype = "master"
1314
      elif node_i.master_candidate:
1315
        ntype = "master candidate"
1316
      elif node_i.drained:
1317
        ntype = "drained"
1318
        n_drained.append(node)
1319
      else:
1320
        ntype = "regular"
1321
      if verbose:
1322
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1323

    
1324
      msg = all_nvinfo[node].fail_msg
1325
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1326
      if msg:
1327
        continue
1328

    
1329
      nresult = all_nvinfo[node].payload
1330
      node_drbd = {}
1331
      for minor, instance in all_drbd_map[node].items():
1332
        test = instance not in instanceinfo
1333
        _ErrorIf(test, self.ECLUSTERCFG, None,
1334
                 "ghost instance '%s' in temporary DRBD map", instance)
1335
          # ghost instance should not be running, but otherwise we
1336
          # don't give double warnings (both ghost instance and
1337
          # unallocated minor in use)
1338
        if test:
1339
          node_drbd[minor] = (instance, False)
1340
        else:
1341
          instance = instanceinfo[instance]
1342
          node_drbd[minor] = (instance.name, instance.admin_up)
1343
      self._VerifyNode(node_i, file_names, local_checksums,
1344
                       nresult, master_files, node_drbd, vg_name)
1345

    
1346
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1347
      if vg_name is None:
1348
        node_volume[node] = {}
1349
      elif isinstance(lvdata, basestring):
1350
        _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1351
                 utils.SafeEncode(lvdata))
1352
        node_volume[node] = {}
1353
      elif not isinstance(lvdata, dict):
1354
        _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1355
        continue
1356
      else:
1357
        node_volume[node] = lvdata
1358

    
1359
      # node_instance
1360
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1361
      test = not isinstance(idata, list)
1362
      _ErrorIf(test, self.ENODEHV, node,
1363
               "rpc call to node failed (instancelist)")
1364
      if test:
1365
        continue
1366

    
1367
      node_instance[node] = idata
1368

    
1369
      # node_info
1370
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1371
      test = not isinstance(nodeinfo, dict)
1372
      _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1373
      if test:
1374
        continue
1375

    
1376
      try:
1377
        node_info[node] = {
1378
          "mfree": int(nodeinfo['memory_free']),
1379
          "pinst": [],
1380
          "sinst": [],
1381
          # dictionary holding all instances this node is secondary for,
1382
          # grouped by their primary node. Each key is a cluster node, and each
1383
          # value is a list of instances which have the key as primary and the
1384
          # current node as secondary.  this is handy to calculate N+1 memory
1385
          # availability if you can only failover from a primary to its
1386
          # secondary.
1387
          "sinst-by-pnode": {},
1388
        }
1389
        # FIXME: devise a free space model for file based instances as well
1390
        if vg_name is not None:
1391
          test = (constants.NV_VGLIST not in nresult or
1392
                  vg_name not in nresult[constants.NV_VGLIST])
1393
          _ErrorIf(test, self.ENODELVM, node,
1394
                   "node didn't return data for the volume group '%s'"
1395
                   " - it is either missing or broken", vg_name)
1396
          if test:
1397
            continue
1398
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1399
      except (ValueError, KeyError):
1400
        _ErrorIf(True, self.ENODERPC, node,
1401
                 "node returned invalid nodeinfo, check lvm/hypervisor")
1402
        continue
1403

    
1404
    node_vol_should = {}
1405

    
1406
    feedback_fn("* Verifying instance status")
1407
    for instance in instancelist:
1408
      if verbose:
1409
        feedback_fn("* Verifying instance %s" % instance)
1410
      inst_config = instanceinfo[instance]
1411
      self._VerifyInstance(instance, inst_config, node_volume,
1412
                           node_instance, n_offline)
1413
      inst_nodes_offline = []
1414

    
1415
      inst_config.MapLVsByNode(node_vol_should)
1416

    
1417
      instance_cfg[instance] = inst_config
1418

    
1419
      pnode = inst_config.primary_node
1420
      _ErrorIf(pnode not in node_info and pnode not in n_offline,
1421
               self.ENODERPC, pnode, "instance %s, connection to"
1422
               " primary node failed", instance)
1423
      if pnode in node_info:
1424
        node_info[pnode]['pinst'].append(instance)
1425

    
1426
      if pnode in n_offline:
1427
        inst_nodes_offline.append(pnode)
1428

    
1429
      # If the instance is non-redundant we cannot survive losing its primary
1430
      # node, so we are not N+1 compliant. On the other hand we have no disk
1431
      # templates with more than one secondary so that situation is not well
1432
      # supported either.
1433
      # FIXME: does not support file-backed instances
1434
      if len(inst_config.secondary_nodes) == 0:
1435
        i_non_redundant.append(instance)
1436
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
1437
               self.EINSTANCELAYOUT, instance,
1438
               "instance has multiple secondary nodes", code="WARNING")
1439

    
1440
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1441
        i_non_a_balanced.append(instance)
1442

    
1443
      for snode in inst_config.secondary_nodes:
1444
        _ErrorIf(snode not in node_info and snode not in n_offline,
1445
                 self.ENODERPC, snode,
1446
                 "instance %s, connection to secondary node"
1447
                 "failed", instance)
1448

    
1449
        if snode in node_info:
1450
          node_info[snode]['sinst'].append(instance)
1451
          if pnode not in node_info[snode]['sinst-by-pnode']:
1452
            node_info[snode]['sinst-by-pnode'][pnode] = []
1453
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1454

    
1455
        if snode in n_offline:
1456
          inst_nodes_offline.append(snode)
1457

    
1458
      # warn that the instance lives on offline nodes
1459
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1460
               "instance lives on offline node(s) %s",
1461
               ", ".join(inst_nodes_offline))
1462

    
1463
    feedback_fn("* Verifying orphan volumes")
1464
    self._VerifyOrphanVolumes(node_vol_should, node_volume)
1465

    
1466
    feedback_fn("* Verifying remaining instances")
1467
    self._VerifyOrphanInstances(instancelist, node_instance)
1468

    
1469
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1470
      feedback_fn("* Verifying N+1 Memory redundancy")
1471
      self._VerifyNPlusOneMemory(node_info, instance_cfg)
1472

    
1473
    feedback_fn("* Other Notes")
1474
    if i_non_redundant:
1475
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1476
                  % len(i_non_redundant))
1477

    
1478
    if i_non_a_balanced:
1479
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1480
                  % len(i_non_a_balanced))
1481

    
1482
    if n_offline:
1483
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1484

    
1485
    if n_drained:
1486
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1487

    
1488
    return not self.bad
1489

    
1490
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1491
    """Analyze the post-hooks' result
1492

1493
    This method analyses the hook result, handles it, and sends some
1494
    nicely-formatted feedback back to the user.
1495

1496
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1497
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1498
    @param hooks_results: the results of the multi-node hooks rpc call
1499
    @param feedback_fn: function used send feedback back to the caller
1500
    @param lu_result: previous Exec result
1501
    @return: the new Exec result, based on the previous result
1502
        and hook results
1503

1504
    """
1505
    # We only really run POST phase hooks, and are only interested in
1506
    # their results
1507
    if phase == constants.HOOKS_PHASE_POST:
1508
      # Used to change hooks' output to proper indentation
1509
      indent_re = re.compile('^', re.M)
1510
      feedback_fn("* Hooks Results")
1511
      assert hooks_results, "invalid result from hooks"
1512

    
1513
      for node_name in hooks_results:
1514
        show_node_header = True
1515
        res = hooks_results[node_name]
1516
        msg = res.fail_msg
1517
        test = msg and not res.offline
1518
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
1519
                      "Communication failure in hooks execution: %s", msg)
1520
        if test:
1521
          # override manually lu_result here as _ErrorIf only
1522
          # overrides self.bad
1523
          lu_result = 1
1524
          continue
1525
        for script, hkr, output in res.payload:
1526
          test = hkr == constants.HKR_FAIL
1527
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
1528
                        "Script %s failed, output:", script)
1529
          if test:
1530
            output = indent_re.sub('      ', output)
1531
            feedback_fn("%s" % output)
1532
            lu_result = 1
1533

    
1534
      return lu_result
1535

    
1536

    
1537
class LUVerifyDisks(NoHooksLU):
1538
  """Verifies the cluster disks status.
1539

1540
  """
1541
  _OP_REQP = []
1542
  REQ_BGL = False
1543

    
1544
  def ExpandNames(self):
1545
    self.needed_locks = {
1546
      locking.LEVEL_NODE: locking.ALL_SET,
1547
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1548
    }
1549
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1550

    
1551
  def CheckPrereq(self):
1552
    """Check prerequisites.
1553

1554
    This has no prerequisites.
1555

1556
    """
1557
    pass
1558

    
1559
  def Exec(self, feedback_fn):
1560
    """Verify integrity of cluster disks.
1561

1562
    @rtype: tuple of three items
1563
    @return: a tuple of (dict of node-to-node_error, list of instances
1564
        which need activate-disks, dict of instance: (node, volume) for
1565
        missing volumes
1566

1567
    """
1568
    result = res_nodes, res_instances, res_missing = {}, [], {}
1569

    
1570
    vg_name = self.cfg.GetVGName()
1571
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1572
    instances = [self.cfg.GetInstanceInfo(name)
1573
                 for name in self.cfg.GetInstanceList()]
1574

    
1575
    nv_dict = {}
1576
    for inst in instances:
1577
      inst_lvs = {}
1578
      if (not inst.admin_up or
1579
          inst.disk_template not in constants.DTS_NET_MIRROR):
1580
        continue
1581
      inst.MapLVsByNode(inst_lvs)
1582
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1583
      for node, vol_list in inst_lvs.iteritems():
1584
        for vol in vol_list:
1585
          nv_dict[(node, vol)] = inst
1586

    
1587
    if not nv_dict:
1588
      return result
1589

    
1590
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1591

    
1592
    for node in nodes:
1593
      # node_volume
1594
      node_res = node_lvs[node]
1595
      if node_res.offline:
1596
        continue
1597
      msg = node_res.fail_msg
1598
      if msg:
1599
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1600
        res_nodes[node] = msg
1601
        continue
1602

    
1603
      lvs = node_res.payload
1604
      for lv_name, (_, lv_inactive, lv_online) in lvs.items():
1605
        inst = nv_dict.pop((node, lv_name), None)
1606
        if (not lv_online and inst is not None
1607
            and inst.name not in res_instances):
1608
          res_instances.append(inst.name)
1609

    
1610
    # any leftover items in nv_dict are missing LVs, let's arrange the
1611
    # data better
1612
    for key, inst in nv_dict.iteritems():
1613
      if inst.name not in res_missing:
1614
        res_missing[inst.name] = []
1615
      res_missing[inst.name].append(key)
1616

    
1617
    return result
1618

    
1619

    
1620
class LURepairDiskSizes(NoHooksLU):
1621
  """Verifies the cluster disks sizes.
1622

1623
  """
1624
  _OP_REQP = ["instances"]
1625
  REQ_BGL = False
1626

    
1627
  def ExpandNames(self):
1628
    if not isinstance(self.op.instances, list):
1629
      raise errors.OpPrereqError("Invalid argument type 'instances'")
1630

    
1631
    if self.op.instances:
1632
      self.wanted_names = []
1633
      for name in self.op.instances:
1634
        full_name = self.cfg.ExpandInstanceName(name)
1635
        if full_name is None:
1636
          raise errors.OpPrereqError("Instance '%s' not known" % name)
1637
        self.wanted_names.append(full_name)
1638
      self.needed_locks = {
1639
        locking.LEVEL_NODE: [],
1640
        locking.LEVEL_INSTANCE: self.wanted_names,
1641
        }
1642
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1643
    else:
1644
      self.wanted_names = None
1645
      self.needed_locks = {
1646
        locking.LEVEL_NODE: locking.ALL_SET,
1647
        locking.LEVEL_INSTANCE: locking.ALL_SET,
1648
        }
1649
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1650

    
1651
  def DeclareLocks(self, level):
1652
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
1653
      self._LockInstancesNodes(primary_only=True)
1654

    
1655
  def CheckPrereq(self):
1656
    """Check prerequisites.
1657

1658
    This only checks the optional instance list against the existing names.
1659

1660
    """
1661
    if self.wanted_names is None:
1662
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1663

    
1664
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1665
                             in self.wanted_names]
1666

    
1667
  def _EnsureChildSizes(self, disk):
1668
    """Ensure children of the disk have the needed disk size.
1669

1670
    This is valid mainly for DRBD8 and fixes an issue where the
1671
    children have smaller disk size.
1672

1673
    @param disk: an L{ganeti.objects.Disk} object
1674

1675
    """
1676
    if disk.dev_type == constants.LD_DRBD8:
1677
      assert disk.children, "Empty children for DRBD8?"
1678
      fchild = disk.children[0]
1679
      mismatch = fchild.size < disk.size
1680
      if mismatch:
1681
        self.LogInfo("Child disk has size %d, parent %d, fixing",
1682
                     fchild.size, disk.size)
1683
        fchild.size = disk.size
1684

    
1685
      # and we recurse on this child only, not on the metadev
1686
      return self._EnsureChildSizes(fchild) or mismatch
1687
    else:
1688
      return False
1689

    
1690
  def Exec(self, feedback_fn):
1691
    """Verify the size of cluster disks.
1692

1693
    """
1694
    # TODO: check child disks too
1695
    # TODO: check differences in size between primary/secondary nodes
1696
    per_node_disks = {}
1697
    for instance in self.wanted_instances:
1698
      pnode = instance.primary_node
1699
      if pnode not in per_node_disks:
1700
        per_node_disks[pnode] = []
1701
      for idx, disk in enumerate(instance.disks):
1702
        per_node_disks[pnode].append((instance, idx, disk))
1703

    
1704
    changed = []
1705
    for node, dskl in per_node_disks.items():
1706
      newl = [v[2].Copy() for v in dskl]
1707
      for dsk in newl:
1708
        self.cfg.SetDiskID(dsk, node)
1709
      result = self.rpc.call_blockdev_getsizes(node, newl)
1710
      if result.fail_msg:
1711
        self.LogWarning("Failure in blockdev_getsizes call to node"
1712
                        " %s, ignoring", node)
1713
        continue
1714
      if len(result.data) != len(dskl):
1715
        self.LogWarning("Invalid result from node %s, ignoring node results",
1716
                        node)
1717
        continue
1718
      for ((instance, idx, disk), size) in zip(dskl, result.data):
1719
        if size is None:
1720
          self.LogWarning("Disk %d of instance %s did not return size"
1721
                          " information, ignoring", idx, instance.name)
1722
          continue
1723
        if not isinstance(size, (int, long)):
1724
          self.LogWarning("Disk %d of instance %s did not return valid"
1725
                          " size information, ignoring", idx, instance.name)
1726
          continue
1727
        size = size >> 20
1728
        if size != disk.size:
1729
          self.LogInfo("Disk %d of instance %s has mismatched size,"
1730
                       " correcting: recorded %d, actual %d", idx,
1731
                       instance.name, disk.size, size)
1732
          disk.size = size
1733
          self.cfg.Update(instance, feedback_fn)
1734
          changed.append((instance.name, idx, size))
1735
        if self._EnsureChildSizes(disk):
1736
          self.cfg.Update(instance, feedback_fn)
1737
          changed.append((instance.name, idx, disk.size))
1738
    return changed
1739

    
1740

    
1741
class LURenameCluster(LogicalUnit):
1742
  """Rename the cluster.
1743

1744
  """
1745
  HPATH = "cluster-rename"
1746
  HTYPE = constants.HTYPE_CLUSTER
1747
  _OP_REQP = ["name"]
1748

    
1749
  def BuildHooksEnv(self):
1750
    """Build hooks env.
1751

1752
    """
1753
    env = {
1754
      "OP_TARGET": self.cfg.GetClusterName(),
1755
      "NEW_NAME": self.op.name,
1756
      }
1757
    mn = self.cfg.GetMasterNode()
1758
    return env, [mn], [mn]
1759

    
1760
  def CheckPrereq(self):
1761
    """Verify that the passed name is a valid one.
1762

1763
    """
1764
    hostname = utils.HostInfo(self.op.name)
1765

    
1766
    new_name = hostname.name
1767
    self.ip = new_ip = hostname.ip
1768
    old_name = self.cfg.GetClusterName()
1769
    old_ip = self.cfg.GetMasterIP()
1770
    if new_name == old_name and new_ip == old_ip:
1771
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1772
                                 " cluster has changed")
1773
    if new_ip != old_ip:
1774
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1775
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1776
                                   " reachable on the network. Aborting." %
1777
                                   new_ip)
1778

    
1779
    self.op.name = new_name
1780

    
1781
  def Exec(self, feedback_fn):
1782
    """Rename the cluster.
1783

1784
    """
1785
    clustername = self.op.name
1786
    ip = self.ip
1787

    
1788
    # shutdown the master IP
1789
    master = self.cfg.GetMasterNode()
1790
    result = self.rpc.call_node_stop_master(master, False)
1791
    result.Raise("Could not disable the master role")
1792

    
1793
    try:
1794
      cluster = self.cfg.GetClusterInfo()
1795
      cluster.cluster_name = clustername
1796
      cluster.master_ip = ip
1797
      self.cfg.Update(cluster, feedback_fn)
1798

    
1799
      # update the known hosts file
1800
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1801
      node_list = self.cfg.GetNodeList()
1802
      try:
1803
        node_list.remove(master)
1804
      except ValueError:
1805
        pass
1806
      result = self.rpc.call_upload_file(node_list,
1807
                                         constants.SSH_KNOWN_HOSTS_FILE)
1808
      for to_node, to_result in result.iteritems():
1809
        msg = to_result.fail_msg
1810
        if msg:
1811
          msg = ("Copy of file %s to node %s failed: %s" %
1812
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1813
          self.proc.LogWarning(msg)
1814

    
1815
    finally:
1816
      result = self.rpc.call_node_start_master(master, False, False)
1817
      msg = result.fail_msg
1818
      if msg:
1819
        self.LogWarning("Could not re-enable the master role on"
1820
                        " the master, please restart manually: %s", msg)
1821

    
1822

    
1823
def _RecursiveCheckIfLVMBased(disk):
1824
  """Check if the given disk or its children are lvm-based.
1825

1826
  @type disk: L{objects.Disk}
1827
  @param disk: the disk to check
1828
  @rtype: boolean
1829
  @return: boolean indicating whether a LD_LV dev_type was found or not
1830

1831
  """
1832
  if disk.children:
1833
    for chdisk in disk.children:
1834
      if _RecursiveCheckIfLVMBased(chdisk):
1835
        return True
1836
  return disk.dev_type == constants.LD_LV
1837

    
1838

    
1839
class LUSetClusterParams(LogicalUnit):
1840
  """Change the parameters of the cluster.
1841

1842
  """
1843
  HPATH = "cluster-modify"
1844
  HTYPE = constants.HTYPE_CLUSTER
1845
  _OP_REQP = []
1846
  REQ_BGL = False
1847

    
1848
  def CheckArguments(self):
1849
    """Check parameters
1850

1851
    """
1852
    if not hasattr(self.op, "candidate_pool_size"):
1853
      self.op.candidate_pool_size = None
1854
    if self.op.candidate_pool_size is not None:
1855
      try:
1856
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1857
      except (ValueError, TypeError), err:
1858
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1859
                                   str(err))
1860
      if self.op.candidate_pool_size < 1:
1861
        raise errors.OpPrereqError("At least one master candidate needed")
1862

    
1863
  def ExpandNames(self):
1864
    # FIXME: in the future maybe other cluster params won't require checking on
1865
    # all nodes to be modified.
1866
    self.needed_locks = {
1867
      locking.LEVEL_NODE: locking.ALL_SET,
1868
    }
1869
    self.share_locks[locking.LEVEL_NODE] = 1
1870

    
1871
  def BuildHooksEnv(self):
1872
    """Build hooks env.
1873

1874
    """
1875
    env = {
1876
      "OP_TARGET": self.cfg.GetClusterName(),
1877
      "NEW_VG_NAME": self.op.vg_name,
1878
      }
1879
    mn = self.cfg.GetMasterNode()
1880
    return env, [mn], [mn]
1881

    
1882
  def CheckPrereq(self):
1883
    """Check prerequisites.
1884

1885
    This checks whether the given params don't conflict and
1886
    if the given volume group is valid.
1887

1888
    """
1889
    if self.op.vg_name is not None and not self.op.vg_name:
1890
      instances = self.cfg.GetAllInstancesInfo().values()
1891
      for inst in instances:
1892
        for disk in inst.disks:
1893
          if _RecursiveCheckIfLVMBased(disk):
1894
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1895
                                       " lvm-based instances exist")
1896

    
1897
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1898

    
1899
    # if vg_name not None, checks given volume group on all nodes
1900
    if self.op.vg_name:
1901
      vglist = self.rpc.call_vg_list(node_list)
1902
      for node in node_list:
1903
        msg = vglist[node].fail_msg
1904
        if msg:
1905
          # ignoring down node
1906
          self.LogWarning("Error while gathering data on node %s"
1907
                          " (ignoring node): %s", node, msg)
1908
          continue
1909
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
1910
                                              self.op.vg_name,
1911
                                              constants.MIN_VG_SIZE)
1912
        if vgstatus:
1913
          raise errors.OpPrereqError("Error on node '%s': %s" %
1914
                                     (node, vgstatus))
1915

    
1916
    self.cluster = cluster = self.cfg.GetClusterInfo()
1917
    # validate params changes
1918
    if self.op.beparams:
1919
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1920
      self.new_beparams = objects.FillDict(
1921
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
1922

    
1923
    if self.op.nicparams:
1924
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
1925
      self.new_nicparams = objects.FillDict(
1926
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
1927
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
1928

    
1929
    # hypervisor list/parameters
1930
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
1931
    if self.op.hvparams:
1932
      if not isinstance(self.op.hvparams, dict):
1933
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
1934
      for hv_name, hv_dict in self.op.hvparams.items():
1935
        if hv_name not in self.new_hvparams:
1936
          self.new_hvparams[hv_name] = hv_dict
1937
        else:
1938
          self.new_hvparams[hv_name].update(hv_dict)
1939

    
1940
    if self.op.enabled_hypervisors is not None:
1941
      self.hv_list = self.op.enabled_hypervisors
1942
      if not self.hv_list:
1943
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
1944
                                   " least one member")
1945
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
1946
      if invalid_hvs:
1947
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
1948
                                   " entries: %s" % " ,".join(invalid_hvs))
1949
    else:
1950
      self.hv_list = cluster.enabled_hypervisors
1951

    
1952
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
1953
      # either the enabled list has changed, or the parameters have, validate
1954
      for hv_name, hv_params in self.new_hvparams.items():
1955
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
1956
            (self.op.enabled_hypervisors and
1957
             hv_name in self.op.enabled_hypervisors)):
1958
          # either this is a new hypervisor, or its parameters have changed
1959
          hv_class = hypervisor.GetHypervisor(hv_name)
1960
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1961
          hv_class.CheckParameterSyntax(hv_params)
1962
          _CheckHVParams(self, node_list, hv_name, hv_params)
1963

    
1964
  def Exec(self, feedback_fn):
1965
    """Change the parameters of the cluster.
1966

1967
    """
1968
    if self.op.vg_name is not None:
1969
      new_volume = self.op.vg_name
1970
      if not new_volume:
1971
        new_volume = None
1972
      if new_volume != self.cfg.GetVGName():
1973
        self.cfg.SetVGName(new_volume)
1974
      else:
1975
        feedback_fn("Cluster LVM configuration already in desired"
1976
                    " state, not changing")
1977
    if self.op.hvparams:
1978
      self.cluster.hvparams = self.new_hvparams
1979
    if self.op.enabled_hypervisors is not None:
1980
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1981
    if self.op.beparams:
1982
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
1983
    if self.op.nicparams:
1984
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
1985

    
1986
    if self.op.candidate_pool_size is not None:
1987
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
1988
      # we need to update the pool size here, otherwise the save will fail
1989
      _AdjustCandidatePool(self, [])
1990

    
1991
    self.cfg.Update(self.cluster, feedback_fn)
1992

    
1993

    
1994
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
1995
  """Distribute additional files which are part of the cluster configuration.
1996

1997
  ConfigWriter takes care of distributing the config and ssconf files, but
1998
  there are more files which should be distributed to all nodes. This function
1999
  makes sure those are copied.
2000

2001
  @param lu: calling logical unit
2002
  @param additional_nodes: list of nodes not in the config to distribute to
2003

2004
  """
2005
  # 1. Gather target nodes
2006
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2007
  dist_nodes = lu.cfg.GetNodeList()
2008
  if additional_nodes is not None:
2009
    dist_nodes.extend(additional_nodes)
2010
  if myself.name in dist_nodes:
2011
    dist_nodes.remove(myself.name)
2012

    
2013
  # 2. Gather files to distribute
2014
  dist_files = set([constants.ETC_HOSTS,
2015
                    constants.SSH_KNOWN_HOSTS_FILE,
2016
                    constants.RAPI_CERT_FILE,
2017
                    constants.RAPI_USERS_FILE,
2018
                    constants.HMAC_CLUSTER_KEY,
2019
                   ])
2020

    
2021
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2022
  for hv_name in enabled_hypervisors:
2023
    hv_class = hypervisor.GetHypervisor(hv_name)
2024
    dist_files.update(hv_class.GetAncillaryFiles())
2025

    
2026
  # 3. Perform the files upload
2027
  for fname in dist_files:
2028
    if os.path.exists(fname):
2029
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2030
      for to_node, to_result in result.items():
2031
        msg = to_result.fail_msg
2032
        if msg:
2033
          msg = ("Copy of file %s to node %s failed: %s" %
2034
                 (fname, to_node, msg))
2035
          lu.proc.LogWarning(msg)
2036

    
2037

    
2038
class LURedistributeConfig(NoHooksLU):
2039
  """Force the redistribution of cluster configuration.
2040

2041
  This is a very simple LU.
2042

2043
  """
2044
  _OP_REQP = []
2045
  REQ_BGL = False
2046

    
2047
  def ExpandNames(self):
2048
    self.needed_locks = {
2049
      locking.LEVEL_NODE: locking.ALL_SET,
2050
    }
2051
    self.share_locks[locking.LEVEL_NODE] = 1
2052

    
2053
  def CheckPrereq(self):
2054
    """Check prerequisites.
2055

2056
    """
2057

    
2058
  def Exec(self, feedback_fn):
2059
    """Redistribute the configuration.
2060

2061
    """
2062
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2063
    _RedistributeAncillaryFiles(self)
2064

    
2065

    
2066
def _WaitForSync(lu, instance, oneshot=False, unlock=False):
2067
  """Sleep and poll for an instance's disk to sync.
2068

2069
  """
2070
  if not instance.disks:
2071
    return True
2072

    
2073
  if not oneshot:
2074
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2075

    
2076
  node = instance.primary_node
2077

    
2078
  for dev in instance.disks:
2079
    lu.cfg.SetDiskID(dev, node)
2080

    
2081
  retries = 0
2082
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2083
  while True:
2084
    max_time = 0
2085
    done = True
2086
    cumul_degraded = False
2087
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2088
    msg = rstats.fail_msg
2089
    if msg:
2090
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2091
      retries += 1
2092
      if retries >= 10:
2093
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2094
                                 " aborting." % node)
2095
      time.sleep(6)
2096
      continue
2097
    rstats = rstats.payload
2098
    retries = 0
2099
    for i, mstat in enumerate(rstats):
2100
      if mstat is None:
2101
        lu.LogWarning("Can't compute data for node %s/%s",
2102
                           node, instance.disks[i].iv_name)
2103
        continue
2104

    
2105
      cumul_degraded = (cumul_degraded or
2106
                        (mstat.is_degraded and mstat.sync_percent is None))
2107
      if mstat.sync_percent is not None:
2108
        done = False
2109
        if mstat.estimated_time is not None:
2110
          rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2111
          max_time = mstat.estimated_time
2112
        else:
2113
          rem_time = "no time estimate"
2114
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2115
                        (instance.disks[i].iv_name, mstat.sync_percent,
2116
                         rem_time))
2117

    
2118
    # if we're done but degraded, let's do a few small retries, to
2119
    # make sure we see a stable and not transient situation; therefore
2120
    # we force restart of the loop
2121
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2122
      logging.info("Degraded disks found, %d retries left", degr_retries)
2123
      degr_retries -= 1
2124
      time.sleep(1)
2125
      continue
2126

    
2127
    if done or oneshot:
2128
      break
2129

    
2130
    time.sleep(min(60, max_time))
2131

    
2132
  if done:
2133
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2134
  return not cumul_degraded
2135

    
2136

    
2137
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2138
  """Check that mirrors are not degraded.
2139

2140
  The ldisk parameter, if True, will change the test from the
2141
  is_degraded attribute (which represents overall non-ok status for
2142
  the device(s)) to the ldisk (representing the local storage status).
2143

2144
  """
2145
  lu.cfg.SetDiskID(dev, node)
2146

    
2147
  result = True
2148

    
2149
  if on_primary or dev.AssembleOnSecondary():
2150
    rstats = lu.rpc.call_blockdev_find(node, dev)
2151
    msg = rstats.fail_msg
2152
    if msg:
2153
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2154
      result = False
2155
    elif not rstats.payload:
2156
      lu.LogWarning("Can't find disk on node %s", node)
2157
      result = False
2158
    else:
2159
      if ldisk:
2160
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2161
      else:
2162
        result = result and not rstats.payload.is_degraded
2163

    
2164
  if dev.children:
2165
    for child in dev.children:
2166
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2167

    
2168
  return result
2169

    
2170

    
2171
class LUDiagnoseOS(NoHooksLU):
2172
  """Logical unit for OS diagnose/query.
2173

2174
  """
2175
  _OP_REQP = ["output_fields", "names"]
2176
  REQ_BGL = False
2177
  _FIELDS_STATIC = utils.FieldSet()
2178
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2179
  # Fields that need calculation of global os validity
2180
  _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2181

    
2182
  def ExpandNames(self):
2183
    if self.op.names:
2184
      raise errors.OpPrereqError("Selective OS query not supported")
2185

    
2186
    _CheckOutputFields(static=self._FIELDS_STATIC,
2187
                       dynamic=self._FIELDS_DYNAMIC,
2188
                       selected=self.op.output_fields)
2189

    
2190
    # Lock all nodes, in shared mode
2191
    # Temporary removal of locks, should be reverted later
2192
    # TODO: reintroduce locks when they are lighter-weight
2193
    self.needed_locks = {}
2194
    #self.share_locks[locking.LEVEL_NODE] = 1
2195
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2196

    
2197
  def CheckPrereq(self):
2198
    """Check prerequisites.
2199

2200
    """
2201

    
2202
  @staticmethod
2203
  def _DiagnoseByOS(node_list, rlist):
2204
    """Remaps a per-node return list into an a per-os per-node dictionary
2205

2206
    @param node_list: a list with the names of all nodes
2207
    @param rlist: a map with node names as keys and OS objects as values
2208

2209
    @rtype: dict
2210
    @return: a dictionary with osnames as keys and as value another map, with
2211
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
2212

2213
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2214
                                     (/srv/..., False, "invalid api")],
2215
                           "node2": [(/srv/..., True, "")]}
2216
          }
2217

2218
    """
2219
    all_os = {}
2220
    # we build here the list of nodes that didn't fail the RPC (at RPC
2221
    # level), so that nodes with a non-responding node daemon don't
2222
    # make all OSes invalid
2223
    good_nodes = [node_name for node_name in rlist
2224
                  if not rlist[node_name].fail_msg]
2225
    for node_name, nr in rlist.items():
2226
      if nr.fail_msg or not nr.payload:
2227
        continue
2228
      for name, path, status, diagnose, variants in nr.payload:
2229
        if name not in all_os:
2230
          # build a list of nodes for this os containing empty lists
2231
          # for each node in node_list
2232
          all_os[name] = {}
2233
          for nname in good_nodes:
2234
            all_os[name][nname] = []
2235
        all_os[name][node_name].append((path, status, diagnose, variants))
2236
    return all_os
2237

    
2238
  def Exec(self, feedback_fn):
2239
    """Compute the list of OSes.
2240

2241
    """
2242
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2243
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2244
    pol = self._DiagnoseByOS(valid_nodes, node_data)
2245
    output = []
2246
    calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2247
    calc_variants = "variants" in self.op.output_fields
2248

    
2249
    for os_name, os_data in pol.items():
2250
      row = []
2251
      if calc_valid:
2252
        valid = True
2253
        variants = None
2254
        for osl in os_data.values():
2255
          valid = valid and osl and osl[0][1]
2256
          if not valid:
2257
            variants = None
2258
            break
2259
          if calc_variants:
2260
            node_variants = osl[0][3]
2261
            if variants is None:
2262
              variants = node_variants
2263
            else:
2264
              variants = [v for v in variants if v in node_variants]
2265

    
2266
      for field in self.op.output_fields:
2267
        if field == "name":
2268
          val = os_name
2269
        elif field == "valid":
2270
          val = valid
2271
        elif field == "node_status":
2272
          # this is just a copy of the dict
2273
          val = {}
2274
          for node_name, nos_list in os_data.items():
2275
            val[node_name] = nos_list
2276
        elif field == "variants":
2277
          val =  variants
2278
        else:
2279
          raise errors.ParameterError(field)
2280
        row.append(val)
2281
      output.append(row)
2282

    
2283
    return output
2284

    
2285

    
2286
class LURemoveNode(LogicalUnit):
2287
  """Logical unit for removing a node.
2288

2289
  """
2290
  HPATH = "node-remove"
2291
  HTYPE = constants.HTYPE_NODE
2292
  _OP_REQP = ["node_name"]
2293

    
2294
  def BuildHooksEnv(self):
2295
    """Build hooks env.
2296

2297
    This doesn't run on the target node in the pre phase as a failed
2298
    node would then be impossible to remove.
2299

2300
    """
2301
    env = {
2302
      "OP_TARGET": self.op.node_name,
2303
      "NODE_NAME": self.op.node_name,
2304
      }
2305
    all_nodes = self.cfg.GetNodeList()
2306
    if self.op.node_name in all_nodes:
2307
      all_nodes.remove(self.op.node_name)
2308
    return env, all_nodes, all_nodes
2309

    
2310
  def CheckPrereq(self):
2311
    """Check prerequisites.
2312

2313
    This checks:
2314
     - the node exists in the configuration
2315
     - it does not have primary or secondary instances
2316
     - it's not the master
2317

2318
    Any errors are signaled by raising errors.OpPrereqError.
2319

2320
    """
2321
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
2322
    if node is None:
2323
      raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
2324

    
2325
    instance_list = self.cfg.GetInstanceList()
2326

    
2327
    masternode = self.cfg.GetMasterNode()
2328
    if node.name == masternode:
2329
      raise errors.OpPrereqError("Node is the master node,"
2330
                                 " you need to failover first.")
2331

    
2332
    for instance_name in instance_list:
2333
      instance = self.cfg.GetInstanceInfo(instance_name)
2334
      if node.name in instance.all_nodes:
2335
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2336
                                   " please remove first." % instance_name)
2337
    self.op.node_name = node.name
2338
    self.node = node
2339

    
2340
  def Exec(self, feedback_fn):
2341
    """Removes the node from the cluster.
2342

2343
    """
2344
    node = self.node
2345
    logging.info("Stopping the node daemon and removing configs from node %s",
2346
                 node.name)
2347

    
2348
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2349

    
2350
    # Promote nodes to master candidate as needed
2351
    _AdjustCandidatePool(self, exceptions=[node.name])
2352
    self.context.RemoveNode(node.name)
2353

    
2354
    # Run post hooks on the node before it's removed
2355
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2356
    try:
2357
      h_results = hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2358
    except:
2359
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
2360

    
2361
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2362
    msg = result.fail_msg
2363
    if msg:
2364
      self.LogWarning("Errors encountered on the remote node while leaving"
2365
                      " the cluster: %s", msg)
2366

    
2367

    
2368
class LUQueryNodes(NoHooksLU):
2369
  """Logical unit for querying nodes.
2370

2371
  """
2372
  _OP_REQP = ["output_fields", "names", "use_locking"]
2373
  REQ_BGL = False
2374

    
2375
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2376
                    "master_candidate", "offline", "drained"]
2377

    
2378
  _FIELDS_DYNAMIC = utils.FieldSet(
2379
    "dtotal", "dfree",
2380
    "mtotal", "mnode", "mfree",
2381
    "bootid",
2382
    "ctotal", "cnodes", "csockets",
2383
    )
2384

    
2385
  _FIELDS_STATIC = utils.FieldSet(*[
2386
    "pinst_cnt", "sinst_cnt",
2387
    "pinst_list", "sinst_list",
2388
    "pip", "sip", "tags",
2389
    "master",
2390
    "role"] + _SIMPLE_FIELDS
2391
    )
2392

    
2393
  def ExpandNames(self):
2394
    _CheckOutputFields(static=self._FIELDS_STATIC,
2395
                       dynamic=self._FIELDS_DYNAMIC,
2396
                       selected=self.op.output_fields)
2397

    
2398
    self.needed_locks = {}
2399
    self.share_locks[locking.LEVEL_NODE] = 1
2400

    
2401
    if self.op.names:
2402
      self.wanted = _GetWantedNodes(self, self.op.names)
2403
    else:
2404
      self.wanted = locking.ALL_SET
2405

    
2406
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2407
    self.do_locking = self.do_node_query and self.op.use_locking
2408
    if self.do_locking:
2409
      # if we don't request only static fields, we need to lock the nodes
2410
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2411

    
2412
  def CheckPrereq(self):
2413
    """Check prerequisites.
2414

2415
    """
2416
    # The validation of the node list is done in the _GetWantedNodes,
2417
    # if non empty, and if empty, there's no validation to do
2418
    pass
2419

    
2420
  def Exec(self, feedback_fn):
2421
    """Computes the list of nodes and their attributes.
2422

2423
    """
2424
    all_info = self.cfg.GetAllNodesInfo()
2425
    if self.do_locking:
2426
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2427
    elif self.wanted != locking.ALL_SET:
2428
      nodenames = self.wanted
2429
      missing = set(nodenames).difference(all_info.keys())
2430
      if missing:
2431
        raise errors.OpExecError(
2432
          "Some nodes were removed before retrieving their data: %s" % missing)
2433
    else:
2434
      nodenames = all_info.keys()
2435

    
2436
    nodenames = utils.NiceSort(nodenames)
2437
    nodelist = [all_info[name] for name in nodenames]
2438

    
2439
    # begin data gathering
2440

    
2441
    if self.do_node_query:
2442
      live_data = {}
2443
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2444
                                          self.cfg.GetHypervisorType())
2445
      for name in nodenames:
2446
        nodeinfo = node_data[name]
2447
        if not nodeinfo.fail_msg and nodeinfo.payload:
2448
          nodeinfo = nodeinfo.payload
2449
          fn = utils.TryConvert
2450
          live_data[name] = {
2451
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2452
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2453
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2454
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2455
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2456
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2457
            "bootid": nodeinfo.get('bootid', None),
2458
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2459
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2460
            }
2461
        else:
2462
          live_data[name] = {}
2463
    else:
2464
      live_data = dict.fromkeys(nodenames, {})
2465

    
2466
    node_to_primary = dict([(name, set()) for name in nodenames])
2467
    node_to_secondary = dict([(name, set()) for name in nodenames])
2468

    
2469
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2470
                             "sinst_cnt", "sinst_list"))
2471
    if inst_fields & frozenset(self.op.output_fields):
2472
      instancelist = self.cfg.GetInstanceList()
2473

    
2474
      for instance_name in instancelist:
2475
        inst = self.cfg.GetInstanceInfo(instance_name)
2476
        if inst.primary_node in node_to_primary:
2477
          node_to_primary[inst.primary_node].add(inst.name)
2478
        for secnode in inst.secondary_nodes:
2479
          if secnode in node_to_secondary:
2480
            node_to_secondary[secnode].add(inst.name)
2481

    
2482
    master_node = self.cfg.GetMasterNode()
2483

    
2484
    # end data gathering
2485

    
2486
    output = []
2487
    for node in nodelist:
2488
      node_output = []
2489
      for field in self.op.output_fields:
2490
        if field in self._SIMPLE_FIELDS:
2491
          val = getattr(node, field)
2492
        elif field == "pinst_list":
2493
          val = list(node_to_primary[node.name])
2494
        elif field == "sinst_list":
2495
          val = list(node_to_secondary[node.name])
2496
        elif field == "pinst_cnt":
2497
          val = len(node_to_primary[node.name])
2498
        elif field == "sinst_cnt":
2499
          val = len(node_to_secondary[node.name])
2500
        elif field == "pip":
2501
          val = node.primary_ip
2502
        elif field == "sip":
2503
          val = node.secondary_ip
2504
        elif field == "tags":
2505
          val = list(node.GetTags())
2506
        elif field == "master":
2507
          val = node.name == master_node
2508
        elif self._FIELDS_DYNAMIC.Matches(field):
2509
          val = live_data[node.name].get(field, None)
2510
        elif field == "role":
2511
          if node.name == master_node:
2512
            val = "M"
2513
          elif node.master_candidate:
2514
            val = "C"
2515
          elif node.drained:
2516
            val = "D"
2517
          elif node.offline:
2518
            val = "O"
2519
          else:
2520
            val = "R"
2521
        else:
2522
          raise errors.ParameterError(field)
2523
        node_output.append(val)
2524
      output.append(node_output)
2525

    
2526
    return output
2527

    
2528

    
2529
class LUQueryNodeVolumes(NoHooksLU):
2530
  """Logical unit for getting volumes on node(s).
2531

2532
  """
2533
  _OP_REQP = ["nodes", "output_fields"]
2534
  REQ_BGL = False
2535
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2536
  _FIELDS_STATIC = utils.FieldSet("node")
2537

    
2538
  def ExpandNames(self):
2539
    _CheckOutputFields(static=self._FIELDS_STATIC,
2540
                       dynamic=self._FIELDS_DYNAMIC,
2541
                       selected=self.op.output_fields)
2542

    
2543
    self.needed_locks = {}
2544
    self.share_locks[locking.LEVEL_NODE] = 1
2545
    if not self.op.nodes:
2546
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2547
    else:
2548
      self.needed_locks[locking.LEVEL_NODE] = \
2549
        _GetWantedNodes(self, self.op.nodes)
2550

    
2551
  def CheckPrereq(self):
2552
    """Check prerequisites.
2553

2554
    This checks that the fields required are valid output fields.
2555

2556
    """
2557
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2558

    
2559
  def Exec(self, feedback_fn):
2560
    """Computes the list of nodes and their attributes.
2561

2562
    """
2563
    nodenames = self.nodes
2564
    volumes = self.rpc.call_node_volumes(nodenames)
2565

    
2566
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2567
             in self.cfg.GetInstanceList()]
2568

    
2569
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2570

    
2571
    output = []
2572
    for node in nodenames:
2573
      nresult = volumes[node]
2574
      if nresult.offline:
2575
        continue
2576
      msg = nresult.fail_msg
2577
      if msg:
2578
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2579
        continue
2580

    
2581
      node_vols = nresult.payload[:]
2582
      node_vols.sort(key=lambda vol: vol['dev'])
2583

    
2584
      for vol in node_vols:
2585
        node_output = []
2586
        for field in self.op.output_fields:
2587
          if field == "node":
2588
            val = node
2589
          elif field == "phys":
2590
            val = vol['dev']
2591
          elif field == "vg":
2592
            val = vol['vg']
2593
          elif field == "name":
2594
            val = vol['name']
2595
          elif field == "size":
2596
            val = int(float(vol['size']))
2597
          elif field == "instance":
2598
            for inst in ilist:
2599
              if node not in lv_by_node[inst]:
2600
                continue
2601
              if vol['name'] in lv_by_node[inst][node]:
2602
                val = inst.name
2603
                break
2604
            else:
2605
              val = '-'
2606
          else:
2607
            raise errors.ParameterError(field)
2608
          node_output.append(str(val))
2609

    
2610
        output.append(node_output)
2611

    
2612
    return output
2613

    
2614

    
2615
class LUQueryNodeStorage(NoHooksLU):
2616
  """Logical unit for getting information on storage units on node(s).
2617

2618
  """
2619
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
2620
  REQ_BGL = False
2621
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
2622

    
2623
  def ExpandNames(self):
2624
    storage_type = self.op.storage_type
2625

    
2626
    if storage_type not in constants.VALID_STORAGE_TYPES:
2627
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type)
2628

    
2629
    _CheckOutputFields(static=self._FIELDS_STATIC,
2630
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
2631
                       selected=self.op.output_fields)
2632

    
2633
    self.needed_locks = {}
2634
    self.share_locks[locking.LEVEL_NODE] = 1
2635

    
2636
    if self.op.nodes:
2637
      self.needed_locks[locking.LEVEL_NODE] = \
2638
        _GetWantedNodes(self, self.op.nodes)
2639
    else:
2640
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2641

    
2642
  def CheckPrereq(self):
2643
    """Check prerequisites.
2644

2645
    This checks that the fields required are valid output fields.
2646

2647
    """
2648
    self.op.name = getattr(self.op, "name", None)
2649

    
2650
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2651

    
2652
  def Exec(self, feedback_fn):
2653
    """Computes the list of nodes and their attributes.
2654

2655
    """
2656
    # Always get name to sort by
2657
    if constants.SF_NAME in self.op.output_fields:
2658
      fields = self.op.output_fields[:]
2659
    else:
2660
      fields = [constants.SF_NAME] + self.op.output_fields
2661

    
2662
    # Never ask for node or type as it's only known to the LU
2663
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
2664
      while extra in fields:
2665
        fields.remove(extra)
2666

    
2667
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2668
    name_idx = field_idx[constants.SF_NAME]
2669

    
2670
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2671
    data = self.rpc.call_storage_list(self.nodes,
2672
                                      self.op.storage_type, st_args,
2673
                                      self.op.name, fields)
2674

    
2675
    result = []
2676

    
2677
    for node in utils.NiceSort(self.nodes):
2678
      nresult = data[node]
2679
      if nresult.offline:
2680
        continue
2681

    
2682
      msg = nresult.fail_msg
2683
      if msg:
2684
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2685
        continue
2686

    
2687
      rows = dict([(row[name_idx], row) for row in nresult.payload])
2688

    
2689
      for name in utils.NiceSort(rows.keys()):
2690
        row = rows[name]
2691

    
2692
        out = []
2693

    
2694
        for field in self.op.output_fields:
2695
          if field == constants.SF_NODE:
2696
            val = node
2697
          elif field == constants.SF_TYPE:
2698
            val = self.op.storage_type
2699
          elif field in field_idx:
2700
            val = row[field_idx[field]]
2701
          else:
2702
            raise errors.ParameterError(field)
2703

    
2704
          out.append(val)
2705

    
2706
        result.append(out)
2707

    
2708
    return result
2709

    
2710

    
2711
class LUModifyNodeStorage(NoHooksLU):
2712
  """Logical unit for modifying a storage volume on a node.
2713

2714
  """
2715
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
2716
  REQ_BGL = False
2717

    
2718
  def CheckArguments(self):
2719
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2720
    if node_name is None:
2721
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2722

    
2723
    self.op.node_name = node_name
2724

    
2725
    storage_type = self.op.storage_type
2726
    if storage_type not in constants.VALID_STORAGE_TYPES:
2727
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type)
2728

    
2729
  def ExpandNames(self):
2730
    self.needed_locks = {
2731
      locking.LEVEL_NODE: self.op.node_name,
2732
      }
2733

    
2734
  def CheckPrereq(self):
2735
    """Check prerequisites.
2736

2737
    """
2738
    storage_type = self.op.storage_type
2739

    
2740
    try:
2741
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
2742
    except KeyError:
2743
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
2744
                                 " modified" % storage_type)
2745

    
2746
    diff = set(self.op.changes.keys()) - modifiable
2747
    if diff:
2748
      raise errors.OpPrereqError("The following fields can not be modified for"
2749
                                 " storage units of type '%s': %r" %
2750
                                 (storage_type, list(diff)))
2751

    
2752
  def Exec(self, feedback_fn):
2753
    """Computes the list of nodes and their attributes.
2754

2755
    """
2756
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2757
    result = self.rpc.call_storage_modify(self.op.node_name,
2758
                                          self.op.storage_type, st_args,
2759
                                          self.op.name, self.op.changes)
2760
    result.Raise("Failed to modify storage unit '%s' on %s" %
2761
                 (self.op.name, self.op.node_name))
2762

    
2763

    
2764
class LUAddNode(LogicalUnit):
2765
  """Logical unit for adding node to the cluster.
2766

2767
  """
2768
  HPATH = "node-add"
2769
  HTYPE = constants.HTYPE_NODE
2770
  _OP_REQP = ["node_name"]
2771

    
2772
  def BuildHooksEnv(self):
2773
    """Build hooks env.
2774

2775
    This will run on all nodes before, and on all nodes + the new node after.
2776

2777
    """
2778
    env = {
2779
      "OP_TARGET": self.op.node_name,
2780
      "NODE_NAME": self.op.node_name,
2781
      "NODE_PIP": self.op.primary_ip,
2782
      "NODE_SIP": self.op.secondary_ip,
2783
      }
2784
    nodes_0 = self.cfg.GetNodeList()
2785
    nodes_1 = nodes_0 + [self.op.node_name, ]
2786
    return env, nodes_0, nodes_1
2787

    
2788
  def CheckPrereq(self):
2789
    """Check prerequisites.
2790

2791
    This checks:
2792
     - the new node is not already in the config
2793
     - it is resolvable
2794
     - its parameters (single/dual homed) matches the cluster
2795

2796
    Any errors are signaled by raising errors.OpPrereqError.
2797

2798
    """
2799
    node_name = self.op.node_name
2800
    cfg = self.cfg
2801

    
2802
    dns_data = utils.HostInfo(node_name)
2803

    
2804
    node = dns_data.name
2805
    primary_ip = self.op.primary_ip = dns_data.ip
2806
    secondary_ip = getattr(self.op, "secondary_ip", None)
2807
    if secondary_ip is None:
2808
      secondary_ip = primary_ip
2809
    if not utils.IsValidIP(secondary_ip):
2810
      raise errors.OpPrereqError("Invalid secondary IP given")
2811
    self.op.secondary_ip = secondary_ip
2812

    
2813
    node_list = cfg.GetNodeList()
2814
    if not self.op.readd and node in node_list:
2815
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2816
                                 node)
2817
    elif self.op.readd and node not in node_list:
2818
      raise errors.OpPrereqError("Node %s is not in the configuration" % node)
2819

    
2820
    for existing_node_name in node_list:
2821
      existing_node = cfg.GetNodeInfo(existing_node_name)
2822

    
2823
      if self.op.readd and node == existing_node_name:
2824
        if (existing_node.primary_ip != primary_ip or
2825
            existing_node.secondary_ip != secondary_ip):
2826
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2827
                                     " address configuration as before")
2828
        continue
2829

    
2830
      if (existing_node.primary_ip == primary_ip or
2831
          existing_node.secondary_ip == primary_ip or
2832
          existing_node.primary_ip == secondary_ip or
2833
          existing_node.secondary_ip == secondary_ip):
2834
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2835
                                   " existing node %s" % existing_node.name)
2836

    
2837
    # check that the type of the node (single versus dual homed) is the
2838
    # same as for the master
2839
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2840
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2841
    newbie_singlehomed = secondary_ip == primary_ip
2842
    if master_singlehomed != newbie_singlehomed:
2843
      if master_singlehomed:
2844
        raise errors.OpPrereqError("The master has no private ip but the"
2845
                                   " new node has one")
2846
      else:
2847
        raise errors.OpPrereqError("The master has a private ip but the"
2848
                                   " new node doesn't have one")
2849

    
2850
    # checks reachability
2851
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2852
      raise errors.OpPrereqError("Node not reachable by ping")
2853

    
2854
    if not newbie_singlehomed:
2855
      # check reachability from my secondary ip to newbie's secondary ip
2856
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2857
                           source=myself.secondary_ip):
2858
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2859
                                   " based ping to noded port")
2860

    
2861
    if self.op.readd:
2862
      exceptions = [node]
2863
    else:
2864
      exceptions = []
2865

    
2866
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
2867

    
2868
    if self.op.readd:
2869
      self.new_node = self.cfg.GetNodeInfo(node)
2870
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
2871
    else:
2872
      self.new_node = objects.Node(name=node,
2873
                                   primary_ip=primary_ip,
2874
                                   secondary_ip=secondary_ip,
2875
                                   master_candidate=self.master_candidate,
2876
                                   offline=False, drained=False)
2877

    
2878
  def Exec(self, feedback_fn):
2879
    """Adds the new node to the cluster.
2880

2881
    """
2882
    new_node = self.new_node
2883
    node = new_node.name
2884

    
2885
    # for re-adds, reset the offline/drained/master-candidate flags;
2886
    # we need to reset here, otherwise offline would prevent RPC calls
2887
    # later in the procedure; this also means that if the re-add
2888
    # fails, we are left with a non-offlined, broken node
2889
    if self.op.readd:
2890
      new_node.drained = new_node.offline = False
2891
      self.LogInfo("Readding a node, the offline/drained flags were reset")
2892
      # if we demote the node, we do cleanup later in the procedure
2893
      new_node.master_candidate = self.master_candidate
2894

    
2895
    # notify the user about any possible mc promotion
2896
    if new_node.master_candidate:
2897
      self.LogInfo("Node will be a master candidate")
2898

    
2899
    # check connectivity
2900
    result = self.rpc.call_version([node])[node]
2901
    result.Raise("Can't get version information from node %s" % node)
2902
    if constants.PROTOCOL_VERSION == result.payload:
2903
      logging.info("Communication to node %s fine, sw version %s match",
2904
                   node, result.payload)
2905
    else:
2906
      raise errors.OpExecError("Version mismatch master version %s,"
2907
                               " node version %s" %
2908
                               (constants.PROTOCOL_VERSION, result.payload))
2909

    
2910
    # setup ssh on node
2911
    if self.cfg.GetClusterInfo().modify_ssh_setup:
2912
      logging.info("Copy ssh key to node %s", node)
2913
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
2914
      keyarray = []
2915
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
2916
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
2917
                  priv_key, pub_key]
2918

    
2919
      for i in keyfiles:
2920
        keyarray.append(utils.ReadFile(i))
2921

    
2922
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
2923
                                      keyarray[2], keyarray[3], keyarray[4],
2924
                                      keyarray[5])
2925
      result.Raise("Cannot transfer ssh keys to the new node")
2926

    
2927
    # Add node to our /etc/hosts, and add key to known_hosts
2928
    if self.cfg.GetClusterInfo().modify_etc_hosts:
2929
      utils.AddHostToEtcHosts(new_node.name)
2930

    
2931
    if new_node.secondary_ip != new_node.primary_ip:
2932
      result = self.rpc.call_node_has_ip_address(new_node.name,
2933
                                                 new_node.secondary_ip)
2934
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
2935
                   prereq=True)
2936
      if not result.payload:
2937
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
2938
                                 " you gave (%s). Please fix and re-run this"
2939
                                 " command." % new_node.secondary_ip)
2940

    
2941
    node_verify_list = [self.cfg.GetMasterNode()]
2942
    node_verify_param = {
2943
      constants.NV_NODELIST: [node],
2944
      # TODO: do a node-net-test as well?
2945
    }
2946

    
2947
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2948
                                       self.cfg.GetClusterName())
2949
    for verifier in node_verify_list:
2950
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
2951
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
2952
      if nl_payload:
2953
        for failed in nl_payload:
2954
          feedback_fn("ssh/hostname verification failed"
2955
                      " (checking from %s): %s" %
2956
                      (verifier, nl_payload[failed]))
2957
        raise errors.OpExecError("ssh/hostname verification failed.")
2958

    
2959
    if self.op.readd:
2960
      _RedistributeAncillaryFiles(self)
2961
      self.context.ReaddNode(new_node)
2962
      # make sure we redistribute the config
2963
      self.cfg.Update(new_node, feedback_fn)
2964
      # and make sure the new node will not have old files around
2965
      if not new_node.master_candidate:
2966
        result = self.rpc.call_node_demote_from_mc(new_node.name)
2967
        msg = result.fail_msg
2968
        if msg:
2969
          self.LogWarning("Node failed to demote itself from master"
2970
                          " candidate status: %s" % msg)
2971
    else:
2972
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
2973
      self.context.AddNode(new_node)
2974

    
2975

    
2976
class LUSetNodeParams(LogicalUnit):
2977
  """Modifies the parameters of a node.
2978

2979
  """
2980
  HPATH = "node-modify"
2981
  HTYPE = constants.HTYPE_NODE
2982
  _OP_REQP = ["node_name"]
2983
  REQ_BGL = False
2984

    
2985
  def CheckArguments(self):
2986
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2987
    if node_name is None:
2988
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2989
    self.op.node_name = node_name
2990
    _CheckBooleanOpField(self.op, 'master_candidate')
2991
    _CheckBooleanOpField(self.op, 'offline')
2992
    _CheckBooleanOpField(self.op, 'drained')
2993
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
2994
    if all_mods.count(None) == 3:
2995
      raise errors.OpPrereqError("Please pass at least one modification")
2996
    if all_mods.count(True) > 1:
2997
      raise errors.OpPrereqError("Can't set the node into more than one"
2998
                                 " state at the same time")
2999

    
3000
  def ExpandNames(self):
3001
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3002

    
3003
  def BuildHooksEnv(self):
3004
    """Build hooks env.
3005

3006
    This runs on the master node.
3007

3008
    """
3009
    env = {
3010
      "OP_TARGET": self.op.node_name,
3011
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3012
      "OFFLINE": str(self.op.offline),
3013
      "DRAINED": str(self.op.drained),
3014
      }
3015
    nl = [self.cfg.GetMasterNode(),
3016
          self.op.node_name]
3017
    return env, nl, nl
3018

    
3019
  def CheckPrereq(self):
3020
    """Check prerequisites.
3021

3022
    This only checks the instance list against the existing names.
3023

3024
    """
3025
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3026

    
3027
    if (self.op.master_candidate is not None or
3028
        self.op.drained is not None or
3029
        self.op.offline is not None):
3030
      # we can't change the master's node flags
3031
      if self.op.node_name == self.cfg.GetMasterNode():
3032
        raise errors.OpPrereqError("The master role can be changed"
3033
                                   " only via masterfailover")
3034

    
3035
    # Boolean value that tells us whether we're offlining or draining the node
3036
    offline_or_drain = self.op.offline == True or self.op.drained == True
3037
    deoffline_or_drain = self.op.offline == False or self.op.drained == False
3038

    
3039
    if (node.master_candidate and
3040
        (self.op.master_candidate == False or offline_or_drain)):
3041
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
3042
      mc_now, mc_should, mc_max = self.cfg.GetMasterCandidateStats()
3043
      if mc_now <= cp_size:
3044
        msg = ("Not enough master candidates (desired"
3045
               " %d, new value will be %d)" % (cp_size, mc_now-1))
3046
        # Only allow forcing the operation if it's an offline/drain operation,
3047
        # and we could not possibly promote more nodes.
3048
        # FIXME: this can still lead to issues if in any way another node which
3049
        # could be promoted appears in the meantime.
3050
        if self.op.force and offline_or_drain and mc_should == mc_max:
3051
          self.LogWarning(msg)
3052
        else:
3053
          raise errors.OpPrereqError(msg)
3054

    
3055
    if (self.op.master_candidate == True and
3056
        ((node.offline and not self.op.offline == False) or
3057
         (node.drained and not self.op.drained == False))):
3058
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3059
                                 " to master_candidate" % node.name)
3060

    
3061
    # If we're being deofflined/drained, we'll MC ourself if needed
3062
    if (deoffline_or_drain and not offline_or_drain and not
3063
        self.op.master_candidate == True):
3064
      self.op.master_candidate = _DecideSelfPromotion(self)
3065
      if self.op.master_candidate:
3066
        self.LogInfo("Autopromoting node to master candidate")
3067

    
3068
    return
3069

    
3070
  def Exec(self, feedback_fn):
3071
    """Modifies a node.
3072

3073
    """
3074
    node = self.node
3075

    
3076
    result = []
3077
    changed_mc = False
3078

    
3079
    if self.op.offline is not None:
3080
      node.offline = self.op.offline
3081
      result.append(("offline", str(self.op.offline)))
3082
      if self.op.offline == True:
3083
        if node.master_candidate:
3084
          node.master_candidate = False
3085
          changed_mc = True
3086
          result.append(("master_candidate", "auto-demotion due to offline"))
3087
        if node.drained:
3088
          node.drained = False
3089
          result.append(("drained", "clear drained status due to offline"))
3090

    
3091
    if self.op.master_candidate is not None:
3092
      node.master_candidate = self.op.master_candidate
3093
      changed_mc = True
3094
      result.append(("master_candidate", str(self.op.master_candidate)))
3095
      if self.op.master_candidate == False:
3096
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3097
        msg = rrc.fail_msg
3098
        if msg:
3099
          self.LogWarning("Node failed to demote itself: %s" % msg)
3100

    
3101
    if self.op.drained is not None:
3102
      node.drained = self.op.drained
3103
      result.append(("drained", str(self.op.drained)))
3104
      if self.op.drained == True:
3105
        if node.master_candidate:
3106
          node.master_candidate = False
3107
          changed_mc = True
3108
          result.append(("master_candidate", "auto-demotion due to drain"))
3109
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3110
          msg = rrc.fail_msg
3111
          if msg:
3112
            self.LogWarning("Node failed to demote itself: %s" % msg)
3113
        if node.offline:
3114
          node.offline = False
3115
          result.append(("offline", "clear offline status due to drain"))
3116

    
3117
    # this will trigger configuration file update, if needed
3118
    self.cfg.Update(node, feedback_fn)
3119
    # this will trigger job queue propagation or cleanup
3120
    if changed_mc:
3121
      self.context.ReaddNode(node)
3122

    
3123
    return result
3124

    
3125

    
3126
class LUPowercycleNode(NoHooksLU):
3127
  """Powercycles a node.
3128

3129
  """
3130
  _OP_REQP = ["node_name", "force"]
3131
  REQ_BGL = False
3132

    
3133
  def CheckArguments(self):
3134
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
3135
    if node_name is None:
3136
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
3137
    self.op.node_name = node_name
3138
    if node_name == self.cfg.GetMasterNode() and not self.op.force:
3139
      raise errors.OpPrereqError("The node is the master and the force"
3140
                                 " parameter was not set")
3141

    
3142
  def ExpandNames(self):
3143
    """Locking for PowercycleNode.
3144

3145
    This is a last-resort option and shouldn't block on other
3146
    jobs. Therefore, we grab no locks.
3147

3148
    """
3149
    self.needed_locks = {}
3150

    
3151
  def CheckPrereq(self):
3152
    """Check prerequisites.
3153

3154
    This LU has no prereqs.
3155

3156
    """
3157
    pass
3158

    
3159
  def Exec(self, feedback_fn):
3160
    """Reboots a node.
3161

3162
    """
3163
    result = self.rpc.call_node_powercycle(self.op.node_name,
3164
                                           self.cfg.GetHypervisorType())
3165
    result.Raise("Failed to schedule the reboot")
3166
    return result.payload
3167

    
3168

    
3169
class LUQueryClusterInfo(NoHooksLU):
3170
  """Query cluster configuration.
3171

3172
  """
3173
  _OP_REQP = []
3174
  REQ_BGL = False
3175

    
3176
  def ExpandNames(self):
3177
    self.needed_locks = {}
3178

    
3179
  def CheckPrereq(self):
3180
    """No prerequsites needed for this LU.
3181

3182
    """
3183
    pass
3184

    
3185
  def Exec(self, feedback_fn):
3186
    """Return cluster config.
3187

3188
    """
3189
    cluster = self.cfg.GetClusterInfo()
3190
    result = {
3191
      "software_version": constants.RELEASE_VERSION,
3192
      "protocol_version": constants.PROTOCOL_VERSION,
3193
      "config_version": constants.CONFIG_VERSION,
3194
      "os_api_version": max(constants.OS_API_VERSIONS),
3195
      "export_version": constants.EXPORT_VERSION,
3196
      "architecture": (platform.architecture()[0], platform.machine()),
3197
      "name": cluster.cluster_name,
3198
      "master": cluster.master_node,
3199
      "default_hypervisor": cluster.enabled_hypervisors[0],
3200
      "enabled_hypervisors": cluster.enabled_hypervisors,
3201
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3202
                        for hypervisor_name in cluster.enabled_hypervisors]),
3203
      "beparams": cluster.beparams,
3204
      "nicparams": cluster.nicparams,
3205
      "candidate_pool_size": cluster.candidate_pool_size,
3206
      "master_netdev": cluster.master_netdev,
3207
      "volume_group_name": cluster.volume_group_name,
3208
      "file_storage_dir": cluster.file_storage_dir,
3209
      "ctime": cluster.ctime,
3210
      "mtime": cluster.mtime,
3211
      "uuid": cluster.uuid,
3212
      "tags": list(cluster.GetTags()),
3213
      }
3214

    
3215
    return result
3216

    
3217

    
3218
class LUQueryConfigValues(NoHooksLU):
3219
  """Return configuration values.
3220

3221
  """
3222
  _OP_REQP = []
3223
  REQ_BGL = False
3224
  _FIELDS_DYNAMIC = utils.FieldSet()
3225
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3226
                                  "watcher_pause")
3227

    
3228
  def ExpandNames(self):
3229
    self.needed_locks = {}
3230

    
3231
    _CheckOutputFields(static=self._FIELDS_STATIC,
3232
                       dynamic=self._FIELDS_DYNAMIC,
3233
                       selected=self.op.output_fields)
3234

    
3235
  def CheckPrereq(self):
3236
    """No prerequisites.
3237

3238
    """
3239
    pass
3240

    
3241
  def Exec(self, feedback_fn):
3242
    """Dump a representation of the cluster config to the standard output.
3243

3244
    """
3245
    values = []
3246
    for field in self.op.output_fields:
3247
      if field == "cluster_name":
3248
        entry = self.cfg.GetClusterName()
3249
      elif field == "master_node":
3250
        entry = self.cfg.GetMasterNode()
3251
      elif field == "drain_flag":
3252
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3253
      elif field == "watcher_pause":
3254
        return utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3255
      else:
3256
        raise errors.ParameterError(field)
3257
      values.append(entry)
3258
    return values
3259

    
3260

    
3261
class LUActivateInstanceDisks(NoHooksLU):
3262
  """Bring up an instance's disks.
3263

3264
  """
3265
  _OP_REQP = ["instance_name"]
3266
  REQ_BGL = False
3267

    
3268
  def ExpandNames(self):
3269
    self._ExpandAndLockInstance()
3270
    self.needed_locks[locking.LEVEL_NODE] = []
3271
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3272

    
3273
  def DeclareLocks(self, level):
3274
    if level == locking.LEVEL_NODE:
3275
      self._LockInstancesNodes()
3276

    
3277
  def CheckPrereq(self):
3278
    """Check prerequisites.
3279

3280
    This checks that the instance is in the cluster.
3281

3282
    """
3283
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3284
    assert self.instance is not None, \
3285
      "Cannot retrieve locked instance %s" % self.op.instance_name
3286
    _CheckNodeOnline(self, self.instance.primary_node)
3287
    if not hasattr(self.op, "ignore_size"):
3288
      self.op.ignore_size = False
3289

    
3290
  def Exec(self, feedback_fn):
3291
    """Activate the disks.
3292

3293
    """
3294
    disks_ok, disks_info = \
3295
              _AssembleInstanceDisks(self, self.instance,
3296
                                     ignore_size=self.op.ignore_size)
3297
    if not disks_ok:
3298
      raise errors.OpExecError("Cannot activate block devices")
3299

    
3300
    return disks_info
3301

    
3302

    
3303
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3304
                           ignore_size=False):
3305
  """Prepare the block devices for an instance.
3306

3307
  This sets up the block devices on all nodes.
3308

3309
  @type lu: L{LogicalUnit}
3310
  @param lu: the logical unit on whose behalf we execute
3311
  @type instance: L{objects.Instance}
3312
  @param instance: the instance for whose disks we assemble
3313
  @type ignore_secondaries: boolean
3314
  @param ignore_secondaries: if true, errors on secondary nodes
3315
      won't result in an error return from the function
3316
  @type ignore_size: boolean
3317
  @param ignore_size: if true, the current known size of the disk
3318
      will not be used during the disk activation, useful for cases
3319
      when the size is wrong
3320
  @return: False if the operation failed, otherwise a list of
3321
      (host, instance_visible_name, node_visible_name)
3322
      with the mapping from node devices to instance devices
3323

3324
  """
3325
  device_info = []
3326
  disks_ok = True
3327
  iname = instance.name
3328
  # With the two passes mechanism we try to reduce the window of
3329
  # opportunity for the race condition of switching DRBD to primary
3330
  # before handshaking occured, but we do not eliminate it
3331

    
3332
  # The proper fix would be to wait (with some limits) until the
3333
  # connection has been made and drbd transitions from WFConnection
3334
  # into any other network-connected state (Connected, SyncTarget,
3335
  # SyncSource, etc.)
3336

    
3337
  # 1st pass, assemble on all nodes in secondary mode
3338
  for inst_disk in instance.disks:
3339
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3340
      if ignore_size:
3341
        node_disk = node_disk.Copy()
3342
        node_disk.UnsetSize()
3343
      lu.cfg.SetDiskID(node_disk, node)
3344
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3345
      msg = result.fail_msg
3346
      if msg:
3347
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3348
                           " (is_primary=False, pass=1): %s",
3349
                           inst_disk.iv_name, node, msg)
3350
        if not ignore_secondaries:
3351
          disks_ok = False
3352

    
3353
  # FIXME: race condition on drbd migration to primary
3354

    
3355
  # 2nd pass, do only the primary node
3356
  for inst_disk in instance.disks:
3357
    dev_path = None
3358

    
3359
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3360
      if node != instance.primary_node:
3361
        continue
3362
      if ignore_size:
3363
        node_disk = node_disk.Copy()
3364
        node_disk.UnsetSize()
3365
      lu.cfg.SetDiskID(node_disk, node)
3366
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3367
      msg = result.fail_msg
3368
      if msg:
3369
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3370
                           " (is_primary=True, pass=2): %s",
3371
                           inst_disk.iv_name, node, msg)
3372
        disks_ok = False
3373
      else:
3374
        dev_path = result.payload
3375

    
3376
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3377

    
3378
  # leave the disks configured for the primary node
3379
  # this is a workaround that would be fixed better by
3380
  # improving the logical/physical id handling
3381
  for disk in instance.disks:
3382
    lu.cfg.SetDiskID(disk, instance.primary_node)
3383

    
3384
  return disks_ok, device_info
3385

    
3386

    
3387
def _StartInstanceDisks(lu, instance, force):
3388
  """Start the disks of an instance.
3389

3390
  """
3391
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3392
                                           ignore_secondaries=force)
3393
  if not disks_ok:
3394
    _ShutdownInstanceDisks(lu, instance)
3395
    if force is not None and not force:
3396
      lu.proc.LogWarning("", hint="If the message above refers to a"
3397
                         " secondary node,"
3398
                         " you can retry the operation using '--force'.")
3399
    raise errors.OpExecError("Disk consistency error")
3400

    
3401

    
3402
class LUDeactivateInstanceDisks(NoHooksLU):
3403
  """Shutdown an instance's disks.
3404

3405
  """
3406
  _OP_REQP = ["instance_name"]
3407
  REQ_BGL = False
3408

    
3409
  def ExpandNames(self):
3410
    self._ExpandAndLockInstance()
3411
    self.needed_locks[locking.LEVEL_NODE] = []
3412
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3413

    
3414
  def DeclareLocks(self, level):
3415
    if level == locking.LEVEL_NODE:
3416
      self._LockInstancesNodes()
3417

    
3418
  def CheckPrereq(self):
3419
    """Check prerequisites.
3420

3421
    This checks that the instance is in the cluster.
3422

3423
    """
3424
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3425
    assert self.instance is not None, \
3426
      "Cannot retrieve locked instance %s" % self.op.instance_name
3427

    
3428
  def Exec(self, feedback_fn):
3429
    """Deactivate the disks
3430

3431
    """
3432
    instance = self.instance
3433
    _SafeShutdownInstanceDisks(self, instance)
3434

    
3435

    
3436
def _SafeShutdownInstanceDisks(lu, instance):
3437
  """Shutdown block devices of an instance.
3438

3439
  This function checks if an instance is running, before calling
3440
  _ShutdownInstanceDisks.
3441

3442
  """
3443
  pnode = instance.primary_node
3444
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
3445
  ins_l.Raise("Can't contact node %s" % pnode)
3446

    
3447
  if instance.name in ins_l.payload:
3448
    raise errors.OpExecError("Instance is running, can't shutdown"
3449
                             " block devices.")
3450

    
3451
  _ShutdownInstanceDisks(lu, instance)
3452

    
3453

    
3454
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3455
  """Shutdown block devices of an instance.
3456

3457
  This does the shutdown on all nodes of the instance.
3458

3459
  If the ignore_primary is false, errors on the primary node are
3460
  ignored.
3461

3462
  """
3463
  all_result = True
3464
  for disk in instance.disks:
3465
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3466
      lu.cfg.SetDiskID(top_disk, node)
3467
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3468
      msg = result.fail_msg
3469
      if msg:
3470
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3471
                      disk.iv_name, node, msg)
3472
        if not ignore_primary or node != instance.primary_node:
3473
          all_result = False
3474
  return all_result
3475

    
3476

    
3477
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3478
  """Checks if a node has enough free memory.
3479

3480
  This function check if a given node has the needed amount of free
3481
  memory. In case the node has less memory or we cannot get the
3482
  information from the node, this function raise an OpPrereqError
3483
  exception.
3484

3485
  @type lu: C{LogicalUnit}
3486
  @param lu: a logical unit from which we get configuration data
3487
  @type node: C{str}
3488
  @param node: the node to check
3489
  @type reason: C{str}
3490
  @param reason: string to use in the error message
3491
  @type requested: C{int}
3492
  @param requested: the amount of memory in MiB to check for
3493
  @type hypervisor_name: C{str}
3494
  @param hypervisor_name: the hypervisor to ask for memory stats
3495
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3496
      we cannot check the node
3497

3498
  """
3499
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3500
  nodeinfo[node].Raise("Can't get data from node %s" % node, prereq=True)
3501
  free_mem = nodeinfo[node].payload.get('memory_free', None)
3502
  if not isinstance(free_mem, int):
3503
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3504
                               " was '%s'" % (node, free_mem))
3505
  if requested > free_mem:
3506
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3507
                               " needed %s MiB, available %s MiB" %
3508
                               (node, reason, requested, free_mem))
3509

    
3510

    
3511
class LUStartupInstance(LogicalUnit):
3512
  """Starts an instance.
3513

3514
  """
3515
  HPATH = "instance-start"
3516
  HTYPE = constants.HTYPE_INSTANCE
3517
  _OP_REQP = ["instance_name", "force"]
3518
  REQ_BGL = False
3519

    
3520
  def ExpandNames(self):
3521
    self._ExpandAndLockInstance()
3522

    
3523
  def BuildHooksEnv(self):
3524
    """Build hooks env.
3525

3526
    This runs on master, primary and secondary nodes of the instance.
3527

3528
    """
3529
    env = {
3530
      "FORCE": self.op.force,
3531
      }
3532
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3533
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3534
    return env, nl, nl
3535

    
3536
  def CheckPrereq(self):
3537
    """Check prerequisites.
3538

3539
    This checks that the instance is in the cluster.
3540

3541
    """
3542
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3543
    assert self.instance is not None, \
3544
      "Cannot retrieve locked instance %s" % self.op.instance_name
3545

    
3546
    # extra beparams
3547
    self.beparams = getattr(self.op, "beparams", {})
3548
    if self.beparams:
3549
      if not isinstance(self.beparams, dict):
3550
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3551
                                   " dict" % (type(self.beparams), ))
3552
      # fill the beparams dict
3553
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3554
      self.op.beparams = self.beparams
3555

    
3556
    # extra hvparams
3557
    self.hvparams = getattr(self.op, "hvparams", {})
3558
    if self.hvparams:
3559
      if not isinstance(self.hvparams, dict):
3560
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3561
                                   " dict" % (type(self.hvparams), ))
3562

    
3563
      # check hypervisor parameter syntax (locally)
3564
      cluster = self.cfg.GetClusterInfo()
3565
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3566
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3567
                                    instance.hvparams)
3568
      filled_hvp.update(self.hvparams)
3569
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3570
      hv_type.CheckParameterSyntax(filled_hvp)
3571
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3572
      self.op.hvparams = self.hvparams
3573

    
3574
    _CheckNodeOnline(self, instance.primary_node)
3575

    
3576
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3577
    # check bridges existence
3578
    _CheckInstanceBridgesExist(self, instance)
3579

    
3580
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3581
                                              instance.name,
3582
                                              instance.hypervisor)
3583
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3584
                      prereq=True)
3585
    if not remote_info.payload: # not running already
3586
      _CheckNodeFreeMemory(self, instance.primary_node,
3587
                           "starting instance %s" % instance.name,
3588
                           bep[constants.BE_MEMORY], instance.hypervisor)
3589

    
3590
  def Exec(self, feedback_fn):
3591
    """Start the instance.
3592

3593
    """
3594
    instance = self.instance
3595
    force = self.op.force
3596

    
3597
    self.cfg.MarkInstanceUp(instance.name)
3598

    
3599
    node_current = instance.primary_node
3600

    
3601
    _StartInstanceDisks(self, instance, force)
3602

    
3603
    result = self.rpc.call_instance_start(node_current, instance,
3604
                                          self.hvparams, self.beparams)
3605
    msg = result.fail_msg
3606
    if msg:
3607
      _ShutdownInstanceDisks(self, instance)
3608
      raise errors.OpExecError("Could not start instance: %s" % msg)
3609

    
3610

    
3611
class LURebootInstance(LogicalUnit):
3612
  """Reboot an instance.
3613

3614
  """
3615
  HPATH = "instance-reboot"
3616
  HTYPE = constants.HTYPE_INSTANCE
3617
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3618
  REQ_BGL = False
3619

    
3620
  def CheckArguments(self):
3621
    """Check the arguments.
3622

3623
    """
3624
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
3625
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
3626

    
3627
  def ExpandNames(self):
3628
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3629
                                   constants.INSTANCE_REBOOT_HARD,
3630
                                   constants.INSTANCE_REBOOT_FULL]:
3631
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3632
                                  (constants.INSTANCE_REBOOT_SOFT,
3633
                                   constants.INSTANCE_REBOOT_HARD,
3634
                                   constants.INSTANCE_REBOOT_FULL))
3635
    self._ExpandAndLockInstance()
3636

    
3637
  def BuildHooksEnv(self):
3638
    """Build hooks env.
3639

3640
    This runs on master, primary and secondary nodes of the instance.
3641

3642
    """
3643
    env = {
3644
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3645
      "REBOOT_TYPE": self.op.reboot_type,
3646
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
3647
      }
3648
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3649
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3650
    return env, nl, nl
3651

    
3652
  def CheckPrereq(self):
3653
    """Check prerequisites.
3654

3655
    This checks that the instance is in the cluster.
3656

3657
    """
3658
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3659
    assert self.instance is not None, \
3660
      "Cannot retrieve locked instance %s" % self.op.instance_name
3661

    
3662
    _CheckNodeOnline(self, instance.primary_node)
3663

    
3664
    # check bridges existence
3665
    _CheckInstanceBridgesExist(self, instance)
3666

    
3667
  def Exec(self, feedback_fn):
3668
    """Reboot the instance.
3669

3670
    """
3671
    instance = self.instance
3672
    ignore_secondaries = self.op.ignore_secondaries
3673
    reboot_type = self.op.reboot_type
3674

    
3675
    node_current = instance.primary_node
3676

    
3677
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3678
                       constants.INSTANCE_REBOOT_HARD]:
3679
      for disk in instance.disks:
3680
        self.cfg.SetDiskID(disk, node_current)
3681
      result = self.rpc.call_instance_reboot(node_current, instance,
3682
                                             reboot_type,
3683
                                             self.shutdown_timeout)
3684
      result.Raise("Could not reboot instance")
3685
    else:
3686
      result = self.rpc.call_instance_shutdown(node_current, instance,
3687
                                               self.shutdown_timeout)
3688
      result.Raise("Could not shutdown instance for full reboot")
3689
      _ShutdownInstanceDisks(self, instance)
3690
      _StartInstanceDisks(self, instance, ignore_secondaries)
3691
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3692
      msg = result.fail_msg
3693
      if msg:
3694
        _ShutdownInstanceDisks(self, instance)
3695
        raise errors.OpExecError("Could not start instance for"
3696
                                 " full reboot: %s" % msg)
3697

    
3698
    self.cfg.MarkInstanceUp(instance.name)
3699

    
3700

    
3701
class LUShutdownInstance(LogicalUnit):
3702
  """Shutdown an instance.
3703

3704
  """
3705
  HPATH = "instance-stop"
3706
  HTYPE = constants.HTYPE_INSTANCE
3707
  _OP_REQP = ["instance_name"]
3708
  REQ_BGL = False
3709

    
3710
  def CheckArguments(self):
3711
    """Check the arguments.
3712

3713
    """
3714
    self.timeout = getattr(self.op, "timeout",
3715
                           constants.DEFAULT_SHUTDOWN_TIMEOUT)
3716

    
3717
  def ExpandNames(self):
3718
    self._ExpandAndLockInstance()
3719

    
3720
  def BuildHooksEnv(self):
3721
    """Build hooks env.
3722

3723
    This runs on master, primary and secondary nodes of the instance.
3724

3725
    """
3726
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3727
    env["TIMEOUT"] = self.timeout
3728
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3729
    return env, nl, nl
3730

    
3731
  def CheckPrereq(self):
3732
    """Check prerequisites.
3733

3734
    This checks that the instance is in the cluster.
3735

3736
    """
3737
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3738
    assert self.instance is not None, \
3739
      "Cannot retrieve locked instance %s" % self.op.instance_name
3740
    _CheckNodeOnline(self, self.instance.primary_node)
3741

    
3742
  def Exec(self, feedback_fn):
3743
    """Shutdown the instance.
3744

3745
    """
3746
    instance = self.instance
3747
    node_current = instance.primary_node
3748
    timeout = self.timeout
3749
    self.cfg.MarkInstanceDown(instance.name)
3750
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
3751
    msg = result.fail_msg
3752
    if msg:
3753
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3754

    
3755
    _ShutdownInstanceDisks(self, instance)
3756

    
3757

    
3758
class LUReinstallInstance(LogicalUnit):
3759
  """Reinstall an instance.
3760

3761
  """
3762
  HPATH = "instance-reinstall"
3763
  HTYPE = constants.HTYPE_INSTANCE
3764
  _OP_REQP = ["instance_name"]
3765
  REQ_BGL = False
3766

    
3767
  def ExpandNames(self):
3768
    self._ExpandAndLockInstance()
3769

    
3770
  def BuildHooksEnv(self):
3771
    """Build hooks env.
3772

3773
    This runs on master, primary and secondary nodes of the instance.
3774

3775
    """
3776
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3777
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3778
    return env, nl, nl
3779

    
3780
  def CheckPrereq(self):
3781
    """Check prerequisites.
3782

3783
    This checks that the instance is in the cluster and is not running.
3784

3785
    """
3786
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3787
    assert instance is not None, \
3788
      "Cannot retrieve locked instance %s" % self.op.instance_name
3789
    _CheckNodeOnline(self, instance.primary_node)
3790

    
3791
    if instance.disk_template == constants.DT_DISKLESS:
3792
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3793
                                 self.op.instance_name)
3794
    if instance.admin_up:
3795
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3796
                                 self.op.instance_name)
3797
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3798
                                              instance.name,
3799
                                              instance.hypervisor)
3800
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3801
                      prereq=True)
3802
    if remote_info.payload:
3803
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3804
                                 (self.op.instance_name,
3805
                                  instance.primary_node))
3806

    
3807
    self.op.os_type = getattr(self.op, "os_type", None)
3808
    self.op.force_variant = getattr(self.op, "force_variant", False)
3809
    if self.op.os_type is not None:
3810
      # OS verification
3811
      pnode = self.cfg.GetNodeInfo(
3812
        self.cfg.ExpandNodeName(instance.primary_node))
3813
      if pnode is None:
3814
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
3815
                                   self.op.pnode)
3816
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3817
      result.Raise("OS '%s' not in supported OS list for primary node %s" %
3818
                   (self.op.os_type, pnode.name), prereq=True)
3819
      if not self.op.force_variant:
3820
        _CheckOSVariant(result.payload, self.op.os_type)
3821

    
3822
    self.instance = instance
3823

    
3824
  def Exec(self, feedback_fn):
3825
    """Reinstall the instance.
3826

3827
    """
3828
    inst = self.instance
3829

    
3830
    if self.op.os_type is not None:
3831
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3832
      inst.os = self.op.os_type
3833
      self.cfg.Update(inst, feedback_fn)
3834

    
3835
    _StartInstanceDisks(self, inst, None)
3836
    try:
3837
      feedback_fn("Running the instance OS create scripts...")
3838
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
3839
      result.Raise("Could not install OS for instance %s on node %s" %
3840
                   (inst.name, inst.primary_node))
3841
    finally:
3842
      _ShutdownInstanceDisks(self, inst)
3843

    
3844

    
3845
class LURecreateInstanceDisks(LogicalUnit):
3846
  """Recreate an instance's missing disks.
3847

3848
  """
3849
  HPATH = "instance-recreate-disks"
3850
  HTYPE = constants.HTYPE_INSTANCE
3851
  _OP_REQP = ["instance_name", "disks"]
3852
  REQ_BGL = False
3853

    
3854
  def CheckArguments(self):
3855
    """Check the arguments.
3856

3857
    """
3858
    if not isinstance(self.op.disks, list):
3859
      raise errors.OpPrereqError("Invalid disks parameter")
3860
    for item in self.op.disks:
3861
      if (not isinstance(item, int) or
3862
          item < 0):
3863
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
3864
                                   str(item))
3865

    
3866
  def ExpandNames(self):
3867
    self._ExpandAndLockInstance()
3868

    
3869
  def BuildHooksEnv(self):
3870
    """Build hooks env.
3871

3872
    This runs on master, primary and secondary nodes of the instance.
3873

3874
    """
3875
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3876
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3877
    return env, nl, nl
3878

    
3879
  def CheckPrereq(self):
3880
    """Check prerequisites.
3881

3882
    This checks that the instance is in the cluster and is not running.
3883

3884
    """
3885
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3886
    assert instance is not None, \
3887
      "Cannot retrieve locked instance %s" % self.op.instance_name
3888
    _CheckNodeOnline(self, instance.primary_node)
3889

    
3890
    if instance.disk_template == constants.DT_DISKLESS:
3891
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3892
                                 self.op.instance_name)
3893
    if instance.admin_up:
3894
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3895
                                 self.op.instance_name)
3896
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3897
                                              instance.name,
3898
                                              instance.hypervisor)
3899
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3900
                      prereq=True)
3901
    if remote_info.payload:
3902
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3903
                                 (self.op.instance_name,
3904
                                  instance.primary_node))
3905

    
3906
    if not self.op.disks:
3907
      self.op.disks = range(len(instance.disks))
3908
    else:
3909
      for idx in self.op.disks:
3910
        if idx >= len(instance.disks):
3911
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx)
3912

    
3913
    self.instance = instance
3914

    
3915
  def Exec(self, feedback_fn):
3916
    """Recreate the disks.
3917

3918
    """
3919
    to_skip = []
3920
    for idx, disk in enumerate(self.instance.disks):
3921
      if idx not in self.op.disks: # disk idx has not been passed in
3922
        to_skip.append(idx)
3923
        continue
3924

    
3925
    _CreateDisks(self, self.instance, to_skip=to_skip)
3926

    
3927

    
3928
class LURenameInstance(LogicalUnit):
3929
  """Rename an instance.
3930

3931
  """
3932
  HPATH = "instance-rename"
3933
  HTYPE = constants.HTYPE_INSTANCE
3934
  _OP_REQP = ["instance_name", "new_name"]
3935

    
3936
  def BuildHooksEnv(self):
3937
    """Build hooks env.
3938

3939
    This runs on master, primary and secondary nodes of the instance.
3940

3941
    """
3942
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3943
    env["INSTANCE_NEW_NAME"] = self.op.new_name
3944
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3945
    return env, nl, nl
3946

    
3947
  def CheckPrereq(self):
3948
    """Check prerequisites.
3949

3950
    This checks that the instance is in the cluster and is not running.
3951

3952
    """
3953
    instance = self.cfg.GetInstanceInfo(
3954
      self.cfg.ExpandInstanceName(self.op.instance_name))
3955
    if instance is None:
3956
      raise errors.OpPrereqError("Instance '%s' not known" %
3957
                                 self.op.instance_name)
3958
    _CheckNodeOnline(self, instance.primary_node)
3959

    
3960
    if instance.admin_up:
3961
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3962
                                 self.op.instance_name)
3963
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3964
                                              instance.name,
3965
                                              instance.hypervisor)
3966
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3967
                      prereq=True)
3968
    if remote_info.payload:
3969
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3970
                                 (self.op.instance_name,
3971
                                  instance.primary_node))
3972
    self.instance = instance
3973

    
3974
    # new name verification
3975
    name_info = utils.HostInfo(self.op.new_name)
3976

    
3977
    self.op.new_name = new_name = name_info.name
3978
    instance_list = self.cfg.GetInstanceList()
3979
    if new_name in instance_list:
3980
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3981
                                 new_name)
3982

    
3983
    if not getattr(self.op, "ignore_ip", False):
3984
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
3985
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
3986
                                   (name_info.ip, new_name))
3987

    
3988

    
3989
  def Exec(self, feedback_fn):
3990
    """Reinstall the instance.
3991

3992
    """
3993
    inst = self.instance
3994
    old_name = inst.name
3995

    
3996
    if inst.disk_template == constants.DT_FILE:
3997
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3998

    
3999
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4000
    # Change the instance lock. This is definitely safe while we hold the BGL
4001
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4002
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4003

    
4004
    # re-read the instance from the configuration after rename
4005
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4006

    
4007
    if inst.disk_template == constants.DT_FILE:
4008
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4009
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4010
                                                     old_file_storage_dir,
4011
                                                     new_file_storage_dir)
4012
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4013
                   " (but the instance has been renamed in Ganeti)" %
4014
                   (inst.primary_node, old_file_storage_dir,
4015
                    new_file_storage_dir))
4016

    
4017
    _StartInstanceDisks(self, inst, None)
4018
    try:
4019
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4020
                                                 old_name)
4021
      msg = result.fail_msg
4022
      if msg:
4023
        msg = ("Could not run OS rename script for instance %s on node %s"
4024
               " (but the instance has been renamed in Ganeti): %s" %
4025
               (inst.name, inst.primary_node, msg))
4026
        self.proc.LogWarning(msg)
4027
    finally:
4028
      _ShutdownInstanceDisks(self, inst)
4029

    
4030

    
4031
class LURemoveInstance(LogicalUnit):
4032
  """Remove an instance.
4033

4034
  """
4035
  HPATH = "instance-remove"
4036
  HTYPE = constants.HTYPE_INSTANCE
4037
  _OP_REQP = ["instance_name", "ignore_failures"]
4038
  REQ_BGL = False
4039

    
4040
  def CheckArguments(self):
4041
    """Check the arguments.
4042

4043
    """
4044
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4045
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4046

    
4047
  def ExpandNames(self):
4048
    self._ExpandAndLockInstance()
4049
    self.needed_locks[locking.LEVEL_NODE] = []
4050
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4051

    
4052
  def DeclareLocks(self, level):
4053
    if level == locking.LEVEL_NODE:
4054
      self._LockInstancesNodes()
4055

    
4056
  def BuildHooksEnv(self):
4057
    """Build hooks env.
4058

4059
    This runs on master, primary and secondary nodes of the instance.
4060

4061
    """
4062
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4063
    env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4064
    nl = [self.cfg.GetMasterNode()]
4065
    return env, nl, nl
4066

    
4067
  def CheckPrereq(self):
4068
    """Check prerequisites.
4069

4070
    This checks that the instance is in the cluster.
4071

4072
    """
4073
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4074
    assert self.instance is not None, \
4075
      "Cannot retrieve locked instance %s" % self.op.instance_name
4076

    
4077
  def Exec(self, feedback_fn):
4078
    """Remove the instance.
4079

4080
    """
4081
    instance = self.instance
4082
    logging.info("Shutting down instance %s on node %s",
4083
                 instance.name, instance.primary_node)
4084

    
4085
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4086
                                             self.shutdown_timeout)
4087
    msg = result.fail_msg
4088
    if msg:
4089
      if self.op.ignore_failures:
4090
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
4091
      else:
4092
        raise errors.OpExecError("Could not shutdown instance %s on"
4093
                                 " node %s: %s" %
4094
                                 (instance.name, instance.primary_node, msg))
4095

    
4096
    logging.info("Removing block devices for instance %s", instance.name)
4097

    
4098
    if not _RemoveDisks(self, instance):
4099
      if self.op.ignore_failures:
4100
        feedback_fn("Warning: can't remove instance's disks")
4101
      else:
4102
        raise errors.OpExecError("Can't remove instance's disks")
4103

    
4104
    logging.info("Removing instance %s out of cluster config", instance.name)
4105

    
4106
    self.cfg.RemoveInstance(instance.name)
4107
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4108

    
4109

    
4110
class LUQueryInstances(NoHooksLU):
4111
  """Logical unit for querying instances.
4112

4113
  """
4114
  _OP_REQP = ["output_fields", "names", "use_locking"]
4115
  REQ_BGL = False
4116
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4117
                    "serial_no", "ctime", "mtime", "uuid"]
4118
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4119
                                    "admin_state",
4120
                                    "disk_template", "ip", "mac", "bridge",
4121
                                    "nic_mode", "nic_link",
4122
                                    "sda_size", "sdb_size", "vcpus", "tags",
4123
                                    "network_port", "beparams",
4124
                                    r"(disk)\.(size)/([0-9]+)",
4125
                                    r"(disk)\.(sizes)", "disk_usage",
4126
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4127
                                    r"(nic)\.(bridge)/([0-9]+)",
4128
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
4129
                                    r"(disk|nic)\.(count)",
4130
                                    "hvparams",
4131
                                    ] + _SIMPLE_FIELDS +
4132
                                  ["hv/%s" % name
4133
                                   for name in constants.HVS_PARAMETERS] +
4134
                                  ["be/%s" % name
4135
                                   for name in constants.BES_PARAMETERS])
4136
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4137

    
4138

    
4139
  def ExpandNames(self):
4140
    _CheckOutputFields(static=self._FIELDS_STATIC,
4141
                       dynamic=self._FIELDS_DYNAMIC,
4142
                       selected=self.op.output_fields)
4143

    
4144
    self.needed_locks = {}
4145
    self.share_locks[locking.LEVEL_INSTANCE] = 1
4146
    self.share_locks[locking.LEVEL_NODE] = 1
4147

    
4148
    if self.op.names:
4149
      self.wanted = _GetWantedInstances(self, self.op.names)
4150
    else:
4151
      self.wanted = locking.ALL_SET
4152

    
4153
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4154
    self.do_locking = self.do_node_query and self.op.use_locking
4155
    if self.do_locking:
4156
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4157
      self.needed_locks[locking.LEVEL_NODE] = []
4158
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4159

    
4160
  def DeclareLocks(self, level):
4161
    if level == locking.LEVEL_NODE and self.do_locking:
4162
      self._LockInstancesNodes()
4163

    
4164
  def CheckPrereq(self):
4165
    """Check prerequisites.
4166

4167
    """
4168
    pass
4169

    
4170
  def Exec(self, feedback_fn):
4171
    """Computes the list of nodes and their attributes.
4172

4173
    """
4174
    all_info = self.cfg.GetAllInstancesInfo()
4175
    if self.wanted == locking.ALL_SET:
4176
      # caller didn't specify instance names, so ordering is not important
4177
      if self.do_locking:
4178
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4179
      else:
4180
        instance_names = all_info.keys()
4181
      instance_names = utils.NiceSort(instance_names)
4182
    else:
4183
      # caller did specify names, so we must keep the ordering
4184
      if self.do_locking:
4185
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4186
      else:
4187
        tgt_set = all_info.keys()
4188
      missing = set(self.wanted).difference(tgt_set)
4189
      if missing:
4190
        raise errors.OpExecError("Some instances were removed before"
4191
                                 " retrieving their data: %s" % missing)
4192
      instance_names = self.wanted
4193

    
4194
    instance_list = [all_info[iname] for iname in instance_names]
4195

    
4196
    # begin data gathering
4197

    
4198
    nodes = frozenset([inst.primary_node for inst in instance_list])
4199
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4200

    
4201
    bad_nodes = []
4202
    off_nodes = []
4203
    if self.do_node_query:
4204
      live_data = {}
4205
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4206
      for name in nodes:
4207
        result = node_data[name]
4208
        if result.offline:
4209
          # offline nodes will be in both lists
4210
          off_nodes.append(name)
4211
        if result.fail_msg:
4212
          bad_nodes.append(name)
4213
        else:
4214
          if result.payload:
4215
            live_data.update(result.payload)
4216
          # else no instance is alive
4217
    else:
4218
      live_data = dict([(name, {}) for name in instance_names])
4219

    
4220
    # end data gathering
4221

    
4222
    HVPREFIX = "hv/"
4223
    BEPREFIX = "be/"
4224
    output = []
4225
    cluster = self.cfg.GetClusterInfo()
4226
    for instance in instance_list:
4227
      iout = []
4228
      i_hv = cluster.FillHV(instance)
4229
      i_be = cluster.FillBE(instance)
4230
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4231
                                 nic.nicparams) for nic in instance.nics]
4232
      for field in self.op.output_fields:
4233
        st_match = self._FIELDS_STATIC.Matches(field)
4234
        if field in self._SIMPLE_FIELDS:
4235
          val = getattr(instance, field)
4236
        elif field == "pnode":
4237
          val = instance.primary_node
4238
        elif field == "snodes":
4239
          val = list(instance.secondary_nodes)
4240
        elif field == "admin_state":
4241
          val = instance.admin_up
4242
        elif field == "oper_state":
4243
          if instance.primary_node in bad_nodes:
4244
            val = None
4245
          else:
4246
            val = bool(live_data.get(instance.name))
4247
        elif field == "status":
4248
          if instance.primary_node in off_nodes:
4249
            val = "ERROR_nodeoffline"
4250
          elif instance.primary_node in bad_nodes:
4251
            val = "ERROR_nodedown"
4252
          else:
4253
            running = bool(live_data.get(instance.name))
4254
            if running:
4255
              if instance.admin_up:
4256
                val = "running"
4257
              else:
4258
                val = "ERROR_up"
4259
            else:
4260
              if instance.admin_up:
4261
                val = "ERROR_down"
4262
              else:
4263
                val = "ADMIN_down"
4264
        elif field == "oper_ram":
4265
          if instance.primary_node in bad_nodes:
4266
            val = None
4267
          elif instance.name in live_data:
4268
            val = live_data[instance.name].get("memory", "?")
4269
          else:
4270
            val = "-"
4271
        elif field == "vcpus":
4272
          val = i_be[constants.BE_VCPUS]
4273
        elif field == "disk_template":
4274
          val = instance.disk_template
4275
        elif field == "ip":
4276
          if instance.nics:
4277
            val = instance.nics[0].ip
4278
          else:
4279
            val = None
4280
        elif field == "nic_mode":
4281
          if instance.nics:
4282
            val = i_nicp[0][constants.NIC_MODE]
4283
          else:
4284
            val = None
4285
        elif field == "nic_link":
4286
          if instance.nics:
4287
            val = i_nicp[0][constants.NIC_LINK]
4288
          else:
4289
            val = None
4290
        elif field == "bridge":
4291
          if (instance.nics and
4292
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4293
            val = i_nicp[0][constants.NIC_LINK]
4294
          else:
4295
            val = None
4296
        elif field == "mac":
4297
          if instance.nics:
4298
            val = instance.nics[0].mac
4299
          else:
4300
            val = None
4301
        elif field == "sda_size" or field == "sdb_size":
4302
          idx = ord(field[2]) - ord('a')
4303
          try:
4304
            val = instance.FindDisk(idx).size
4305
          except errors.OpPrereqError:
4306
            val = None
4307
        elif field == "disk_usage": # total disk usage per node
4308
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
4309
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4310
        elif field == "tags":
4311
          val = list(instance.GetTags())
4312
        elif field == "hvparams":
4313
          val = i_hv
4314
        elif (field.startswith(HVPREFIX) and
4315
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
4316
          val = i_hv.get(field[len(HVPREFIX):], None)
4317
        elif field == "beparams":
4318
          val = i_be
4319
        elif (field.startswith(BEPREFIX) and
4320
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4321
          val = i_be.get(field[len(BEPREFIX):], None)
4322
        elif st_match and st_match.groups():
4323
          # matches a variable list
4324
          st_groups = st_match.groups()
4325
          if st_groups and st_groups[0] == "disk":
4326
            if st_groups[1] == "count":
4327
              val = len(instance.disks)
4328
            elif st_groups[1] == "sizes":
4329
              val = [disk.size for disk in instance.disks]
4330
            elif st_groups[1] == "size":
4331
              try:
4332
                val = instance.FindDisk(st_groups[2]).size
4333
              except errors.OpPrereqError:
4334
                val = None
4335
            else:
4336
              assert False, "Unhandled disk parameter"
4337
          elif st_groups[0] == "nic":
4338
            if st_groups[1] == "count":
4339
              val = len(instance.nics)
4340
            elif st_groups[1] == "macs":
4341
              val = [nic.mac for nic in instance.nics]
4342
            elif st_groups[1] == "ips":
4343
              val = [nic.ip for nic in instance.nics]
4344
            elif st_groups[1] == "modes":
4345
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4346
            elif st_groups[1] == "links":
4347
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4348
            elif st_groups[1] == "bridges":
4349
              val = []
4350
              for nicp in i_nicp:
4351
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4352
                  val.append(nicp[constants.NIC_LINK])
4353
                else:
4354
                  val.append(None)
4355
            else:
4356
              # index-based item
4357
              nic_idx = int(st_groups[2])
4358
              if nic_idx >= len(instance.nics):
4359
                val = None
4360
              else:
4361
                if st_groups[1] == "mac":
4362
                  val = instance.nics[nic_idx].mac
4363
                elif st_groups[1] == "ip":
4364
                  val = instance.nics[nic_idx].ip
4365
                elif st_groups[1] == "mode":
4366
                  val = i_nicp[nic_idx][constants.NIC_MODE]
4367
                elif st_groups[1] == "link":
4368
                  val = i_nicp[nic_idx][constants.NIC_LINK]
4369
                elif st_groups[1] == "bridge":
4370
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4371
                  if nic_mode == constants.NIC_MODE_BRIDGED:
4372
                    val = i_nicp[nic_idx][constants.NIC_LINK]
4373
                  else:
4374
                    val = None
4375
                else:
4376
                  assert False, "Unhandled NIC parameter"
4377
          else:
4378
            assert False, ("Declared but unhandled variable parameter '%s'" %
4379
                           field)
4380
        else:
4381
          assert False, "Declared but unhandled parameter '%s'" % field
4382
        iout.append(val)
4383
      output.append(iout)
4384

    
4385
    return output
4386

    
4387

    
4388
class LUFailoverInstance(LogicalUnit):
4389
  """Failover an instance.
4390

4391
  """
4392
  HPATH = "instance-failover"
4393
  HTYPE = constants.HTYPE_INSTANCE
4394
  _OP_REQP = ["instance_name", "ignore_consistency"]
4395
  REQ_BGL = False
4396

    
4397
  def CheckArguments(self):
4398
    """Check the arguments.
4399

4400
    """
4401
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4402
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4403

    
4404
  def ExpandNames(self):
4405
    self._ExpandAndLockInstance()
4406
    self.needed_locks[locking.LEVEL_NODE] = []
4407
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4408

    
4409
  def DeclareLocks(self, level):
4410
    if level == locking.LEVEL_NODE:
4411
      self._LockInstancesNodes()
4412

    
4413
  def BuildHooksEnv(self):
4414
    """Build hooks env.
4415

4416
    This runs on master, primary and secondary nodes of the instance.
4417

4418
    """
4419
    env = {
4420
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4421
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4422
      }
4423
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4424
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
4425
    return env, nl, nl
4426

    
4427
  def CheckPrereq(self):
4428
    """Check prerequisites.
4429

4430
    This checks that the instance is in the cluster.
4431

4432
    """
4433
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4434
    assert self.instance is not None, \
4435
      "Cannot retrieve locked instance %s" % self.op.instance_name
4436

    
4437
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4438
    if instance.disk_template not in constants.DTS_NET_MIRROR:
4439
      raise errors.OpPrereqError("Instance's disk layout is not"
4440
                                 " network mirrored, cannot failover.")
4441

    
4442
    secondary_nodes = instance.secondary_nodes
4443
    if not secondary_nodes:
4444
      raise errors.ProgrammerError("no secondary node but using "
4445
                                   "a mirrored disk template")
4446

    
4447
    target_node = secondary_nodes[0]
4448
    _CheckNodeOnline(self, target_node)
4449
    _CheckNodeNotDrained(self, target_node)
4450
    if instance.admin_up:
4451
      # check memory requirements on the secondary node
4452
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4453
                           instance.name, bep[constants.BE_MEMORY],
4454
                           instance.hypervisor)
4455
    else:
4456
      self.LogInfo("Not checking memory on the secondary node as"
4457
                   " instance will not be started")
4458

    
4459
    # check bridge existance
4460
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4461

    
4462
  def Exec(self, feedback_fn):
4463
    """Failover an instance.
4464

4465
    The failover is done by shutting it down on its present node and
4466
    starting it on the secondary.
4467

4468
    """
4469
    instance = self.instance
4470

    
4471
    source_node = instance.primary_node
4472
    target_node = instance.secondary_nodes[0]
4473

    
4474
    if instance.admin_up:
4475
      feedback_fn("* checking disk consistency between source and target")
4476
      for dev in instance.disks:
4477
        # for drbd, these are drbd over lvm
4478
        if not _CheckDiskConsistency(self, dev, target_node, False):
4479
          if not self.op.ignore_consistency:
4480
            raise errors.OpExecError("Disk %s is degraded on target node,"
4481
                                     " aborting failover." % dev.iv_name)
4482
    else:
4483
      feedback_fn("* not checking disk consistency as instance is not running")
4484

    
4485
    feedback_fn("* shutting down instance on source node")
4486
    logging.info("Shutting down instance %s on node %s",
4487
                 instance.name, source_node)
4488

    
4489
    result = self.rpc.call_instance_shutdown(source_node, instance,
4490
                                             self.shutdown_timeout)
4491
    msg = result.fail_msg
4492
    if msg:
4493
      if self.op.ignore_consistency:
4494
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4495
                             " Proceeding anyway. Please make sure node"
4496
                             " %s is down. Error details: %s",
4497
                             instance.name, source_node, source_node, msg)
4498
      else:
4499
        raise errors.OpExecError("Could not shutdown instance %s on"
4500
                                 " node %s: %s" %
4501
                                 (instance.name, source_node, msg))
4502

    
4503
    feedback_fn("* deactivating the instance's disks on source node")
4504
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
4505
      raise errors.OpExecError("Can't shut down the instance's disks.")
4506

    
4507
    instance.primary_node = target_node
4508
    # distribute new instance config to the other nodes
4509
    self.cfg.Update(instance, feedback_fn)
4510

    
4511
    # Only start the instance if it's marked as up
4512
    if instance.admin_up:
4513
      feedback_fn("* activating the instance's disks on target node")
4514
      logging.info("Starting instance %s on node %s",
4515
                   instance.name, target_node)
4516

    
4517
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4518
                                               ignore_secondaries=True)
4519
      if not disks_ok:
4520
        _ShutdownInstanceDisks(self, instance)
4521
        raise errors.OpExecError("Can't activate the instance's disks")
4522

    
4523
      feedback_fn("* starting the instance on the target node")
4524
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4525
      msg = result.fail_msg
4526
      if msg:
4527
        _ShutdownInstanceDisks(self, instance)
4528
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4529
                                 (instance.name, target_node, msg))
4530

    
4531

    
4532
class LUMigrateInstance(LogicalUnit):
4533
  """Migrate an instance.
4534

4535
  This is migration without shutting down, compared to the failover,
4536
  which is done with shutdown.
4537

4538
  """
4539
  HPATH = "instance-migrate"
4540
  HTYPE = constants.HTYPE_INSTANCE
4541
  _OP_REQP = ["instance_name", "live", "cleanup"]
4542

    
4543
  REQ_BGL = False
4544

    
4545
  def ExpandNames(self):
4546
    self._ExpandAndLockInstance()
4547

    
4548
    self.needed_locks[locking.LEVEL_NODE] = []
4549
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4550

    
4551
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
4552
                                       self.op.live, self.op.cleanup)
4553
    self.tasklets = [self._migrater]
4554

    
4555
  def DeclareLocks(self, level):
4556
    if level == locking.LEVEL_NODE:
4557
      self._LockInstancesNodes()
4558

    
4559
  def BuildHooksEnv(self):
4560
    """Build hooks env.
4561

4562
    This runs on master, primary and secondary nodes of the instance.
4563

4564
    """
4565
    instance = self._migrater.instance
4566
    env = _BuildInstanceHookEnvByObject(self, instance)
4567
    env["MIGRATE_LIVE"] = self.op.live
4568
    env["MIGRATE_CLEANUP"] = self.op.cleanup
4569
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4570
    return env, nl, nl
4571

    
4572

    
4573
class LUMoveInstance(LogicalUnit):
4574
  """Move an instance by data-copying.
4575

4576
  """
4577
  HPATH = "instance-move"
4578
  HTYPE = constants.HTYPE_INSTANCE
4579
  _OP_REQP = ["instance_name", "target_node"]
4580
  REQ_BGL = False
4581

    
4582
  def CheckArguments(self):
4583
    """Check the arguments.
4584

4585
    """
4586
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4587
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4588

    
4589
  def ExpandNames(self):
4590
    self._ExpandAndLockInstance()
4591
    target_node = self.cfg.ExpandNodeName(self.op.target_node)
4592
    if target_node is None:
4593
      raise errors.OpPrereqError("Node '%s' not known" %
4594
                                  self.op.target_node)
4595
    self.op.target_node = target_node
4596
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
4597
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4598

    
4599
  def DeclareLocks(self, level):
4600
    if level == locking.LEVEL_NODE:
4601
      self._LockInstancesNodes(primary_only=True)
4602

    
4603
  def BuildHooksEnv(self):
4604
    """Build hooks env.
4605

4606
    This runs on master, primary and secondary nodes of the instance.
4607

4608
    """
4609
    env = {
4610
      "TARGET_NODE": self.op.target_node,
4611
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4612
      }
4613
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4614
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
4615
                                       self.op.target_node]
4616
    return env, nl, nl
4617

    
4618
  def CheckPrereq(self):
4619
    """Check prerequisites.
4620

4621
    This checks that the instance is in the cluster.
4622

4623
    """
4624
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4625
    assert self.instance is not None, \
4626
      "Cannot retrieve locked instance %s" % self.op.instance_name
4627

    
4628
    node = self.cfg.GetNodeInfo(self.op.target_node)
4629
    assert node is not None, \
4630
      "Cannot retrieve locked node %s" % self.op.target_node
4631

    
4632
    self.target_node = target_node = node.name
4633

    
4634
    if target_node == instance.primary_node:
4635
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
4636
                                 (instance.name, target_node))
4637

    
4638
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4639

    
4640
    for idx, dsk in enumerate(instance.disks):
4641
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
4642
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
4643
                                   " cannot copy")
4644

    
4645
    _CheckNodeOnline(self, target_node)
4646
    _CheckNodeNotDrained(self, target_node)
4647

    
4648
    if instance.admin_up:
4649
      # check memory requirements on the secondary node
4650
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4651
                           instance.name, bep[constants.BE_MEMORY],
4652
                           instance.hypervisor)
4653
    else:
4654
      self.LogInfo("Not checking memory on the secondary node as"
4655
                   " instance will not be started")
4656

    
4657
    # check bridge existance
4658
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4659

    
4660
  def Exec(self, feedback_fn):
4661
    """Move an instance.
4662

4663
    The move is done by shutting it down on its present node, copying
4664
    the data over (slow) and starting it on the new node.
4665

4666
    """
4667
    instance = self.instance
4668

    
4669
    source_node = instance.primary_node
4670
    target_node = self.target_node
4671

    
4672
    self.LogInfo("Shutting down instance %s on source node %s",
4673
                 instance.name, source_node)
4674

    
4675
    result = self.rpc.call_instance_shutdown(source_node, instance,
4676
                                             self.shutdown_timeout)
4677
    msg = result.fail_msg
4678
    if msg:
4679
      if self.op.ignore_consistency:
4680
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4681
                             " Proceeding anyway. Please make sure node"
4682
                             " %s is down. Error details: %s",
4683
                             instance.name, source_node, source_node, msg)
4684
      else:
4685
        raise errors.OpExecError("Could not shutdown instance %s on"
4686
                                 " node %s: %s" %
4687
                                 (instance.name, source_node, msg))
4688

    
4689
    # create the target disks
4690
    try:
4691
      _CreateDisks(self, instance, target_node=target_node)
4692
    except errors.OpExecError:
4693
      self.LogWarning("Device creation failed, reverting...")
4694
      try:
4695
        _RemoveDisks(self, instance, target_node=target_node)
4696
      finally:
4697
        self.cfg.ReleaseDRBDMinors(instance.name)
4698
        raise
4699

    
4700
    cluster_name = self.cfg.GetClusterInfo().cluster_name
4701

    
4702
    errs = []
4703
    # activate, get path, copy the data over
4704
    for idx, disk in enumerate(instance.disks):
4705
      self.LogInfo("Copying data for disk %d", idx)
4706
      result = self.rpc.call_blockdev_assemble(target_node, disk,
4707
                                               instance.name, True)
4708
      if result.fail_msg:
4709
        self.LogWarning("Can't assemble newly created disk %d: %s",
4710
                        idx, result.fail_msg)
4711
        errs.append(result.fail_msg)
4712
        break
4713
      dev_path = result.payload
4714
      result = self.rpc.call_blockdev_export(source_node, disk,
4715
                                             target_node, dev_path,
4716
                                             cluster_name)
4717
      if result.fail_msg:
4718
        self.LogWarning("Can't copy data over for disk %d: %s",
4719
                        idx, result.fail_msg)
4720
        errs.append(result.fail_msg)
4721
        break
4722

    
4723
    if errs:
4724
      self.LogWarning("Some disks failed to copy, aborting")
4725
      try:
4726
        _RemoveDisks(self, instance, target_node=target_node)
4727
      finally:
4728
        self.cfg.ReleaseDRBDMinors(instance.name)
4729
        raise errors.OpExecError("Errors during disk copy: %s" %
4730
                                 (",".join(errs),))
4731

    
4732
    instance.primary_node = target_node
4733
    self.cfg.Update(instance, feedback_fn)
4734

    
4735
    self.LogInfo("Removing the disks on the original node")
4736
    _RemoveDisks(self, instance, target_node=source_node)
4737

    
4738
    # Only start the instance if it's marked as up
4739
    if instance.admin_up:
4740
      self.LogInfo("Starting instance %s on node %s",
4741
                   instance.name, target_node)
4742

    
4743
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4744
                                           ignore_secondaries=True)
4745
      if not disks_ok:
4746
        _ShutdownInstanceDisks(self, instance)
4747
        raise errors.OpExecError("Can't activate the instance's disks")
4748

    
4749
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4750
      msg = result.fail_msg
4751
      if msg:
4752
        _ShutdownInstanceDisks(self, instance)
4753
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4754
                                 (instance.name, target_node, msg))
4755

    
4756

    
4757
class LUMigrateNode(LogicalUnit):
4758
  """Migrate all instances from a node.
4759

4760
  """
4761
  HPATH = "node-migrate"
4762
  HTYPE = constants.HTYPE_NODE
4763
  _OP_REQP = ["node_name", "live"]
4764
  REQ_BGL = False
4765

    
4766
  def ExpandNames(self):
4767
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
4768
    if self.op.node_name is None:
4769
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
4770

    
4771
    self.needed_locks = {
4772
      locking.LEVEL_NODE: [self.op.node_name],
4773
      }
4774

    
4775
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4776

    
4777
    # Create tasklets for migrating instances for all instances on this node
4778
    names = []
4779
    tasklets = []
4780

    
4781
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
4782
      logging.debug("Migrating instance %s", inst.name)
4783
      names.append(inst.name)
4784

    
4785
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
4786

    
4787
    self.tasklets = tasklets
4788

    
4789
    # Declare instance locks
4790
    self.needed_locks[locking.LEVEL_INSTANCE] = names
4791

    
4792
  def DeclareLocks(self, level):
4793
    if level == locking.LEVEL_NODE:
4794
      self._LockInstancesNodes()
4795

    
4796
  def BuildHooksEnv(self):
4797
    """Build hooks env.
4798

4799
    This runs on the master, the primary and all the secondaries.
4800

4801
    """
4802
    env = {
4803
      "NODE_NAME": self.op.node_name,
4804
      }
4805

    
4806
    nl = [self.cfg.GetMasterNode()]
4807

    
4808
    return (env, nl, nl)
4809

    
4810

    
4811
class TLMigrateInstance(Tasklet):
4812
  def __init__(self, lu, instance_name, live, cleanup):
4813
    """Initializes this class.
4814

4815
    """
4816
    Tasklet.__init__(self, lu)
4817

    
4818
    # Parameters
4819
    self.instance_name = instance_name
4820
    self.live = live
4821
    self.cleanup = cleanup
4822

    
4823
  def CheckPrereq(self):
4824
    """Check prerequisites.
4825

4826
    This checks that the instance is in the cluster.
4827

4828
    """
4829
    instance = self.cfg.GetInstanceInfo(
4830
      self.cfg.ExpandInstanceName(self.instance_name))
4831
    if instance is None:
4832
      raise errors.OpPrereqError("Instance '%s' not known" %
4833
                                 self.instance_name)
4834

    
4835
    if instance.disk_template != constants.DT_DRBD8:
4836
      raise errors.OpPrereqError("Instance's disk layout is not"
4837
                                 " drbd8, cannot migrate.")
4838

    
4839
    secondary_nodes = instance.secondary_nodes
4840
    if not secondary_nodes:
4841
      raise errors.ConfigurationError("No secondary node but using"
4842
                                      " drbd8 disk template")
4843

    
4844
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
4845

    
4846
    target_node = secondary_nodes[0]
4847
    # check memory requirements on the secondary node
4848
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
4849
                         instance.name, i_be[constants.BE_MEMORY],
4850
                         instance.hypervisor)
4851

    
4852
    # check bridge existance
4853
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4854

    
4855
    if not self.cleanup:
4856
      _CheckNodeNotDrained(self, target_node)
4857
      result = self.rpc.call_instance_migratable(instance.primary_node,
4858
                                                 instance)
4859
      result.Raise("Can't migrate, please use failover", prereq=True)
4860

    
4861
    self.instance = instance
4862

    
4863
  def _WaitUntilSync(self):
4864
    """Poll with custom rpc for disk sync.
4865

4866
    This uses our own step-based rpc call.
4867

4868
    """
4869
    self.feedback_fn("* wait until resync is done")
4870
    all_done = False
4871
    while not all_done:
4872
      all_done = True
4873
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
4874
                                            self.nodes_ip,
4875
                                            self.instance.disks)
4876
      min_percent = 100
4877
      for node, nres in result.items():
4878
        nres.Raise("Cannot resync disks on node %s" % node)
4879
        node_done, node_percent = nres.payload
4880
        all_done = all_done and node_done
4881
        if node_percent is not None:
4882
          min_percent = min(min_percent, node_percent)
4883
      if not all_done:
4884
        if min_percent < 100:
4885
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
4886
        time.sleep(2)
4887

    
4888
  def _EnsureSecondary(self, node):
4889
    """Demote a node to secondary.
4890

4891
    """
4892
    self.feedback_fn("* switching node %s to secondary mode" % node)
4893

    
4894
    for dev in self.instance.disks:
4895
      self.cfg.SetDiskID(dev, node)
4896

    
4897
    result = self.rpc.call_blockdev_close(node, self.instance.name,
4898
                                          self.instance.disks)
4899
    result.Raise("Cannot change disk to secondary on node %s" % node)
4900

    
4901
  def _GoStandalone(self):
4902
    """Disconnect from the network.
4903

4904
    """
4905
    self.feedback_fn("* changing into standalone mode")
4906
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
4907
                                               self.instance.disks)
4908
    for node, nres in result.items():
4909
      nres.Raise("Cannot disconnect disks node %s" % node)
4910

    
4911
  def _GoReconnect(self, multimaster):
4912
    """Reconnect to the network.
4913

4914
    """
4915
    if multimaster:
4916
      msg = "dual-master"
4917
    else:
4918
      msg = "single-master"
4919
    self.feedback_fn("* changing disks into %s mode" % msg)
4920
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
4921
                                           self.instance.disks,
4922
                                           self.instance.name, multimaster)
4923
    for node, nres in result.items():
4924
      nres.Raise("Cannot change disks config on node %s" % node)
4925

    
4926
  def _ExecCleanup(self):
4927
    """Try to cleanup after a failed migration.
4928

4929
    The cleanup is done by:
4930
      - check that the instance is running only on one node
4931
        (and update the config if needed)
4932
      - change disks on its secondary node to secondary
4933
      - wait until disks are fully synchronized
4934
      - disconnect from the network
4935
      - change disks into single-master mode
4936
      - wait again until disks are fully synchronized
4937

4938
    """
4939
    instance = self.instance
4940
    target_node = self.target_node
4941
    source_node = self.source_node
4942

    
4943
    # check running on only one node
4944
    self.feedback_fn("* checking where the instance actually runs"
4945
                     " (if this hangs, the hypervisor might be in"
4946
                     " a bad state)")
4947
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
4948
    for node, result in ins_l.items():
4949
      result.Raise("Can't contact node %s" % node)
4950

    
4951
    runningon_source = instance.name in ins_l[source_node].payload
4952
    runningon_target = instance.name in ins_l[target_node].payload
4953

    
4954
    if runningon_source and runningon_target:
4955
      raise errors.OpExecError("Instance seems to be running on two nodes,"
4956
                               " or the hypervisor is confused. You will have"
4957
                               " to ensure manually that it runs only on one"
4958
                               " and restart this operation.")
4959

    
4960
    if not (runningon_source or runningon_target):
4961
      raise errors.OpExecError("Instance does not seem to be running at all."
4962
                               " In this case, it's safer to repair by"
4963
                               " running 'gnt-instance stop' to ensure disk"
4964
                               " shutdown, and then restarting it.")
4965

    
4966
    if runningon_target:
4967
      # the migration has actually succeeded, we need to update the config
4968
      self.feedback_fn("* instance running on secondary node (%s),"
4969
                       " updating config" % target_node)
4970
      instance.primary_node = target_node
4971
      self.cfg.Update(instance, self.feedback_fn)
4972
      demoted_node = source_node
4973
    else:
4974
      self.feedback_fn("* instance confirmed to be running on its"
4975
                       " primary node (%s)" % source_node)
4976
      demoted_node = target_node
4977

    
4978
    self._EnsureSecondary(demoted_node)
4979
    try:
4980
      self._WaitUntilSync()
4981
    except errors.OpExecError:
4982
      # we ignore here errors, since if the device is standalone, it
4983
      # won't be able to sync
4984
      pass
4985
    self._GoStandalone()
4986
    self._GoReconnect(False)
4987
    self._WaitUntilSync()
4988

    
4989
    self.feedback_fn("* done")
4990

    
4991
  def _RevertDiskStatus(self):
4992
    """Try to revert the disk status after a failed migration.
4993

4994
    """
4995
    target_node = self.target_node
4996
    try:
4997
      self._EnsureSecondary(target_node)
4998
      self._GoStandalone()
4999
      self._GoReconnect(False)
5000
      self._WaitUntilSync()
5001
    except errors.OpExecError, err:
5002
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5003
                         " drives: error '%s'\n"
5004
                         "Please look and recover the instance status" %
5005
                         str(err))
5006

    
5007
  def _AbortMigration(self):
5008
    """Call the hypervisor code to abort a started migration.
5009

5010
    """
5011
    instance = self.instance
5012
    target_node = self.target_node
5013
    migration_info = self.migration_info
5014

    
5015
    abort_result = self.rpc.call_finalize_migration(target_node,
5016
                                                    instance,
5017
                                                    migration_info,
5018
                                                    False)
5019
    abort_msg = abort_result.fail_msg
5020
    if abort_msg:
5021
      logging.error("Aborting migration failed on target node %s: %s" %
5022
                    (target_node, abort_msg))
5023
      # Don't raise an exception here, as we stil have to try to revert the
5024
      # disk status, even if this step failed.
5025

    
5026
  def _ExecMigration(self):
5027
    """Migrate an instance.
5028

5029
    The migrate is done by:
5030
      - change the disks into dual-master mode
5031
      - wait until disks are fully synchronized again
5032
      - migrate the instance
5033
      - change disks on the new secondary node (the old primary) to secondary
5034
      - wait until disks are fully synchronized
5035
      - change disks into single-master mode
5036

5037
    """
5038
    instance = self.instance
5039
    target_node = self.target_node
5040
    source_node = self.source_node
5041

    
5042
    self.feedback_fn("* checking disk consistency between source and target")
5043
    for dev in instance.disks:
5044
      if not _CheckDiskConsistency(self, dev, target_node, False):
5045
        raise errors.OpExecError("Disk %s is degraded or not fully"
5046
                                 " synchronized on target node,"
5047
                                 " aborting migrate." % dev.iv_name)
5048

    
5049
    # First get the migration information from the remote node
5050
    result = self.rpc.call_migration_info(source_node, instance)
5051
    msg = result.fail_msg
5052
    if msg:
5053
      log_err = ("Failed fetching source migration information from %s: %s" %
5054
                 (source_node, msg))
5055
      logging.error(log_err)
5056
      raise errors.OpExecError(log_err)
5057

    
5058
    self.migration_info = migration_info = result.payload
5059

    
5060
    # Then switch the disks to master/master mode
5061
    self._EnsureSecondary(target_node)
5062
    self._GoStandalone()
5063
    self._GoReconnect(True)
5064
    self._WaitUntilSync()
5065

    
5066
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
5067
    result = self.rpc.call_accept_instance(target_node,
5068
                                           instance,
5069
                                           migration_info,
5070
                                           self.nodes_ip[target_node])
5071

    
5072
    msg = result.fail_msg
5073
    if msg:
5074
      logging.error("Instance pre-migration failed, trying to revert"
5075
                    " disk status: %s", msg)
5076
      self._AbortMigration()
5077
      self._RevertDiskStatus()
5078
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5079
                               (instance.name, msg))
5080

    
5081
    self.feedback_fn("* migrating instance to %s" % target_node)
5082
    time.sleep(10)
5083
    result = self.rpc.call_instance_migrate(source_node, instance,
5084
                                            self.nodes_ip[target_node],
5085
                                            self.live)
5086
    msg = result.fail_msg
5087
    if msg:
5088
      logging.error("Instance migration failed, trying to revert"
5089
                    " disk status: %s", msg)
5090
      self._AbortMigration()
5091
      self._RevertDiskStatus()
5092
      raise errors.OpExecError("Could not migrate instance %s: %s" %
5093
                               (instance.name, msg))
5094
    time.sleep(10)
5095

    
5096
    instance.primary_node = target_node
5097
    # distribute new instance config to the other nodes
5098
    self.cfg.Update(instance, self.feedback_fn)
5099

    
5100
    result = self.rpc.call_finalize_migration(target_node,
5101
                                              instance,
5102
                                              migration_info,
5103
                                              True)
5104
    msg = result.fail_msg
5105
    if msg:
5106
      logging.error("Instance migration succeeded, but finalization failed:"
5107
                    " %s" % msg)
5108
      raise errors.OpExecError("Could not finalize instance migration: %s" %
5109
                               msg)
5110

    
5111
    self._EnsureSecondary(source_node)
5112
    self._WaitUntilSync()
5113
    self._GoStandalone()
5114
    self._GoReconnect(False)
5115
    self._WaitUntilSync()
5116

    
5117
    self.feedback_fn("* done")
5118

    
5119
  def Exec(self, feedback_fn):
5120
    """Perform the migration.
5121

5122
    """
5123
    feedback_fn("Migrating instance %s" % self.instance.name)
5124

    
5125
    self.feedback_fn = feedback_fn
5126

    
5127
    self.source_node = self.instance.primary_node
5128
    self.target_node = self.instance.secondary_nodes[0]
5129
    self.all_nodes = [self.source_node, self.target_node]
5130
    self.nodes_ip = {
5131
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5132
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5133
      }
5134

    
5135
    if self.cleanup:
5136
      return self._ExecCleanup()
5137
    else:
5138
      return self._ExecMigration()
5139

    
5140

    
5141
def _CreateBlockDev(lu, node, instance, device, force_create,
5142
                    info, force_open):
5143
  """Create a tree of block devices on a given node.
5144

5145
  If this device type has to be created on secondaries, create it and
5146
  all its children.
5147

5148
  If not, just recurse to children keeping the same 'force' value.
5149

5150
  @param lu: the lu on whose behalf we execute
5151
  @param node: the node on which to create the device
5152
  @type instance: L{objects.Instance}
5153
  @param instance: the instance which owns the device
5154
  @type device: L{objects.Disk}
5155
  @param device: the device to create
5156
  @type force_create: boolean
5157
  @param force_create: whether to force creation of this device; this
5158
      will be change to True whenever we find a device which has
5159
      CreateOnSecondary() attribute
5160
  @param info: the extra 'metadata' we should attach to the device
5161
      (this will be represented as a LVM tag)
5162
  @type force_open: boolean
5163
  @param force_open: this parameter will be passes to the
5164
      L{backend.BlockdevCreate} function where it specifies
5165
      whether we run on primary or not, and it affects both
5166
      the child assembly and the device own Open() execution
5167

5168
  """
5169
  if device.CreateOnSecondary():
5170
    force_create = True
5171

    
5172
  if device.children:
5173
    for child in device.children:
5174
      _CreateBlockDev(lu, node, instance, child, force_create,
5175
                      info, force_open)
5176

    
5177
  if not force_create:
5178
    return
5179

    
5180
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5181

    
5182

    
5183
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5184
  """Create a single block device on a given node.
5185

5186
  This will not recurse over children of the device, so they must be
5187
  created in advance.
5188

5189
  @param lu: the lu on whose behalf we execute
5190
  @param node: the node on which to create the device
5191
  @type instance: L{objects.Instance}
5192
  @param instance: the instance which owns the device
5193
  @type device: L{objects.Disk}
5194
  @param device: the device to create
5195
  @param info: the extra 'metadata' we should attach to the device
5196
      (this will be represented as a LVM tag)
5197
  @type force_open: boolean
5198
  @param force_open: this parameter will be passes to the
5199
      L{backend.BlockdevCreate} function where it specifies
5200
      whether we run on primary or not, and it affects both
5201
      the child assembly and the device own Open() execution
5202

5203
  """
5204
  lu.cfg.SetDiskID(device, node)
5205
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5206
                                       instance.name, force_open, info)
5207
  result.Raise("Can't create block device %s on"
5208
               " node %s for instance %s" % (device, node, instance.name))
5209
  if device.physical_id is None:
5210
    device.physical_id = result.payload
5211

    
5212

    
5213
def _GenerateUniqueNames(lu, exts):
5214
  """Generate a suitable LV name.
5215

5216
  This will generate a logical volume name for the given instance.
5217

5218
  """
5219
  results = []
5220
  for val in exts:
5221
    new_id = lu.cfg.GenerateUniqueID()
5222
    results.append("%s%s" % (new_id, val))
5223
  return results
5224

    
5225

    
5226
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5227
                         p_minor, s_minor):
5228
  """Generate a drbd8 device complete with its children.
5229

5230
  """
5231
  port = lu.cfg.AllocatePort()
5232
  vgname = lu.cfg.GetVGName()
5233
  shared_secret = lu.cfg.GenerateDRBDSecret()
5234
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5235
                          logical_id=(vgname, names[0]))
5236
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5237
                          logical_id=(vgname, names[1]))
5238
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5239
                          logical_id=(primary, secondary, port,
5240
                                      p_minor, s_minor,
5241
                                      shared_secret),
5242
                          children=[dev_data, dev_meta],
5243
                          iv_name=iv_name)
5244
  return drbd_dev
5245

    
5246

    
5247
def _GenerateDiskTemplate(lu, template_name,
5248
                          instance_name, primary_node,
5249
                          secondary_nodes, disk_info,
5250
                          file_storage_dir, file_driver,
5251
                          base_index):
5252
  """Generate the entire disk layout for a given template type.
5253

5254
  """
5255
  #TODO: compute space requirements
5256

    
5257
  vgname = lu.cfg.GetVGName()
5258
  disk_count = len(disk_info)
5259
  disks = []
5260
  if template_name == constants.DT_DISKLESS:
5261
    pass
5262
  elif template_name == constants.DT_PLAIN:
5263
    if len(secondary_nodes) != 0:
5264
      raise errors.ProgrammerError("Wrong template configuration")
5265

    
5266
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5267
                                      for i in range(disk_count)])
5268
    for idx, disk in enumerate(disk_info):
5269
      disk_index = idx + base_index
5270
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5271
                              logical_id=(vgname, names[idx]),
5272
                              iv_name="disk/%d" % disk_index,
5273
                              mode=disk["mode"])
5274
      disks.append(disk_dev)
5275
  elif template_name == constants.DT_DRBD8:
5276
    if len(secondary_nodes) != 1:
5277
      raise errors.ProgrammerError("Wrong template configuration")
5278
    remote_node = secondary_nodes[0]
5279
    minors = lu.cfg.AllocateDRBDMinor(
5280
      [primary_node, remote_node] * len(disk_info), instance_name)
5281

    
5282
    names = []
5283
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5284
                                               for i in range(disk_count)]):
5285
      names.append(lv_prefix + "_data")
5286
      names.append(lv_prefix + "_meta")
5287
    for idx, disk in enumerate(disk_info):
5288
      disk_index = idx + base_index
5289
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5290
                                      disk["size"], names[idx*2:idx*2+2],
5291
                                      "disk/%d" % disk_index,
5292
                                      minors[idx*2], minors[idx*2+1])
5293
      disk_dev.mode = disk["mode"]
5294
      disks.append(disk_dev)
5295
  elif template_name == constants.DT_FILE:
5296
    if len(secondary_nodes) != 0:
5297
      raise errors.ProgrammerError("Wrong template configuration")
5298

    
5299
    for idx, disk in enumerate(disk_info):
5300
      disk_index = idx + base_index
5301
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5302
                              iv_name="disk/%d" % disk_index,
5303
                              logical_id=(file_driver,
5304
                                          "%s/disk%d" % (file_storage_dir,
5305
                                                         disk_index)),
5306
                              mode=disk["mode"])
5307
      disks.append(disk_dev)
5308
  else:
5309
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5310
  return disks
5311

    
5312

    
5313
def _GetInstanceInfoText(instance):
5314
  """Compute that text that should be added to the disk's metadata.
5315

5316
  """
5317
  return "originstname+%s" % instance.name
5318

    
5319

    
5320
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5321
  """Create all disks for an instance.
5322

5323
  This abstracts away some work from AddInstance.
5324

5325
  @type lu: L{LogicalUnit}
5326
  @param lu: the logical unit on whose behalf we execute
5327
  @type instance: L{objects.Instance}
5328
  @param instance: the instance whose disks we should create
5329
  @type to_skip: list
5330
  @param to_skip: list of indices to skip
5331
  @type target_node: string
5332
  @param target_node: if passed, overrides the target node for creation
5333
  @rtype: boolean
5334
  @return: the success of the creation
5335

5336
  """
5337
  info = _GetInstanceInfoText(instance)
5338
  if target_node is None:
5339
    pnode = instance.primary_node
5340
    all_nodes = instance.all_nodes
5341
  else:
5342
    pnode = target_node
5343
    all_nodes = [pnode]
5344

    
5345
  if instance.disk_template == constants.DT_FILE:
5346
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5347
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5348

    
5349
    result.Raise("Failed to create directory '%s' on"
5350
                 " node %s" % (file_storage_dir, pnode))
5351

    
5352
  # Note: this needs to be kept in sync with adding of disks in
5353
  # LUSetInstanceParams
5354
  for idx, device in enumerate(instance.disks):
5355
    if to_skip and idx in to_skip:
5356
      continue
5357
    logging.info("Creating volume %s for instance %s",
5358
                 device.iv_name, instance.name)
5359
    #HARDCODE
5360
    for node in all_nodes:
5361
      f_create = node == pnode
5362
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5363

    
5364

    
5365
def _RemoveDisks(lu, instance, target_node=None):
5366
  """Remove all disks for an instance.
5367

5368
  This abstracts away some work from `AddInstance()` and
5369
  `RemoveInstance()`. Note that in case some of the devices couldn't
5370
  be removed, the removal will continue with the other ones (compare
5371
  with `_CreateDisks()`).
5372

5373
  @type lu: L{LogicalUnit}
5374
  @param lu: the logical unit on whose behalf we execute
5375
  @type instance: L{objects.Instance}
5376
  @param instance: the instance whose disks we should remove
5377
  @type target_node: string
5378
  @param target_node: used to override the node on which to remove the disks
5379
  @rtype: boolean
5380
  @return: the success of the removal
5381

5382
  """
5383
  logging.info("Removing block devices for instance %s", instance.name)
5384

    
5385
  all_result = True
5386
  for device in instance.disks:
5387
    if target_node:
5388
      edata = [(target_node, device)]
5389
    else:
5390
      edata = device.ComputeNodeTree(instance.primary_node)
5391
    for node, disk in edata:
5392
      lu.cfg.SetDiskID(disk, node)
5393
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5394
      if msg:
5395
        lu.LogWarning("Could not remove block device %s on node %s,"
5396
                      " continuing anyway: %s", device.iv_name, node, msg)
5397
        all_result = False
5398

    
5399
  if instance.disk_template == constants.DT_FILE:
5400
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5401
    if target_node:
5402
      tgt = target_node
5403
    else:
5404
      tgt = instance.primary_node
5405
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5406
    if result.fail_msg:
5407
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5408
                    file_storage_dir, instance.primary_node, result.fail_msg)
5409
      all_result = False
5410

    
5411
  return all_result
5412

    
5413

    
5414
def _ComputeDiskSize(disk_template, disks):
5415
  """Compute disk size requirements in the volume group
5416

5417
  """
5418
  # Required free disk space as a function of disk and swap space
5419
  req_size_dict = {
5420
    constants.DT_DISKLESS: None,
5421
    constants.DT_PLAIN: sum(d["size"] for d in disks),
5422
    # 128 MB are added for drbd metadata for each disk
5423
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5424
    constants.DT_FILE: None,
5425
  }
5426

    
5427
  if disk_template not in req_size_dict:
5428
    raise errors.ProgrammerError("Disk template '%s' size requirement"
5429
                                 " is unknown" %  disk_template)
5430

    
5431
  return req_size_dict[disk_template]
5432

    
5433

    
5434
def _CheckHVParams(lu, nodenames, hvname, hvparams):
5435
  """Hypervisor parameter validation.
5436

5437
  This function abstract the hypervisor parameter validation to be
5438
  used in both instance create and instance modify.
5439

5440
  @type lu: L{LogicalUnit}
5441
  @param lu: the logical unit for which we check
5442
  @type nodenames: list
5443
  @param nodenames: the list of nodes on which we should check
5444
  @type hvname: string
5445
  @param hvname: the name of the hypervisor we should use
5446
  @type hvparams: dict
5447
  @param hvparams: the parameters which we need to check
5448
  @raise errors.OpPrereqError: if the parameters are not valid
5449

5450
  """
5451
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5452
                                                  hvname,
5453
                                                  hvparams)
5454
  for node in nodenames:
5455
    info = hvinfo[node]
5456
    if info.offline:
5457
      continue
5458
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
5459

    
5460

    
5461
class LUCreateInstance(LogicalUnit):
5462
  """Create an instance.
5463

5464
  """
5465
  HPATH = "instance-add"
5466
  HTYPE = constants.HTYPE_INSTANCE
5467
  _OP_REQP = ["instance_name", "disks", "disk_template",
5468
              "mode", "start",
5469
              "wait_for_sync", "ip_check", "nics",
5470
              "hvparams", "beparams"]
5471
  REQ_BGL = False
5472

    
5473
  def _ExpandNode(self, node):
5474
    """Expands and checks one node name.
5475

5476
    """
5477
    node_full = self.cfg.ExpandNodeName(node)
5478
    if node_full is None:
5479
      raise errors.OpPrereqError("Unknown node %s" % node)
5480
    return node_full
5481

    
5482
  def ExpandNames(self):
5483
    """ExpandNames for CreateInstance.
5484

5485
    Figure out the right locks for instance creation.
5486

5487
    """
5488
    self.needed_locks = {}
5489

    
5490
    # set optional parameters to none if they don't exist
5491
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
5492
      if not hasattr(self.op, attr):
5493
        setattr(self.op, attr, None)
5494

    
5495
    # cheap checks, mostly valid constants given
5496

    
5497
    # verify creation mode
5498
    if self.op.mode not in (constants.INSTANCE_CREATE,
5499
                            constants.INSTANCE_IMPORT):
5500
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
5501
                                 self.op.mode)
5502

    
5503
    # disk template and mirror node verification
5504
    if self.op.disk_template not in constants.DISK_TEMPLATES:
5505
      raise errors.OpPrereqError("Invalid disk template name")
5506

    
5507
    if self.op.hypervisor is None:
5508
      self.op.hypervisor = self.cfg.GetHypervisorType()
5509

    
5510
    cluster = self.cfg.GetClusterInfo()
5511
    enabled_hvs = cluster.enabled_hypervisors
5512
    if self.op.hypervisor not in enabled_hvs:
5513
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
5514
                                 " cluster (%s)" % (self.op.hypervisor,
5515
                                  ",".join(enabled_hvs)))
5516

    
5517
    # check hypervisor parameter syntax (locally)
5518
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5519
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
5520
                                  self.op.hvparams)
5521
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
5522
    hv_type.CheckParameterSyntax(filled_hvp)
5523
    self.hv_full = filled_hvp
5524

    
5525
    # fill and remember the beparams dict
5526
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5527
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
5528
                                    self.op.beparams)
5529

    
5530
    #### instance parameters check
5531

    
5532
    # instance name verification
5533
    hostname1 = utils.HostInfo(self.op.instance_name)
5534
    self.op.instance_name = instance_name = hostname1.name
5535

    
5536
    # this is just a preventive check, but someone might still add this
5537
    # instance in the meantime, and creation will fail at lock-add time
5538
    if instance_name in self.cfg.GetInstanceList():
5539
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5540
                                 instance_name)
5541

    
5542
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
5543

    
5544
    # NIC buildup
5545
    self.nics = []
5546
    for idx, nic in enumerate(self.op.nics):
5547
      nic_mode_req = nic.get("mode", None)
5548
      nic_mode = nic_mode_req
5549
      if nic_mode is None:
5550
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5551

    
5552
      # in routed mode, for the first nic, the default ip is 'auto'
5553
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
5554
        default_ip_mode = constants.VALUE_AUTO
5555
      else:
5556
        default_ip_mode = constants.VALUE_NONE
5557

    
5558
      # ip validity checks
5559
      ip = nic.get("ip", default_ip_mode)
5560
      if ip is None or ip.lower() == constants.VALUE_NONE:
5561
        nic_ip = None
5562
      elif ip.lower() == constants.VALUE_AUTO:
5563
        nic_ip = hostname1.ip
5564
      else:
5565
        if not utils.IsValidIP(ip):
5566
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
5567
                                     " like a valid IP" % ip)
5568
        nic_ip = ip
5569

    
5570
      # TODO: check the ip for uniqueness !!
5571
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
5572
        raise errors.OpPrereqError("Routed nic mode requires an ip address")
5573

    
5574
      # MAC address verification
5575
      mac = nic.get("mac", constants.VALUE_AUTO)
5576
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5577
        if not utils.IsValidMac(mac.lower()):
5578
          raise errors.OpPrereqError("Invalid MAC address specified: %s" %
5579
                                     mac)
5580
        else:
5581
          # or validate/reserve the current one
5582
          if self.cfg.IsMacInUse(mac):
5583
            raise errors.OpPrereqError("MAC address %s already in use"
5584
                                       " in cluster" % mac)
5585

    
5586
      # bridge verification
5587
      bridge = nic.get("bridge", None)
5588
      link = nic.get("link", None)
5589
      if bridge and link:
5590
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
5591
                                   " at the same time")
5592
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
5593
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic")
5594
      elif bridge:
5595
        link = bridge
5596

    
5597
      nicparams = {}
5598
      if nic_mode_req:
5599
        nicparams[constants.NIC_MODE] = nic_mode_req
5600
      if link:
5601
        nicparams[constants.NIC_LINK] = link
5602

    
5603
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
5604
                                      nicparams)
5605
      objects.NIC.CheckParameterSyntax(check_params)
5606
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
5607

    
5608
    # disk checks/pre-build
5609
    self.disks = []
5610
    for disk in self.op.disks:
5611
      mode = disk.get("mode", constants.DISK_RDWR)
5612
      if mode not in constants.DISK_ACCESS_SET:
5613
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
5614
                                   mode)
5615
      size = disk.get("size", None)
5616
      if size is None:
5617
        raise errors.OpPrereqError("Missing disk size")
5618
      try:
5619
        size = int(size)
5620
      except ValueError:
5621
        raise errors.OpPrereqError("Invalid disk size '%s'" % size)
5622
      self.disks.append({"size": size, "mode": mode})
5623

    
5624
    # used in CheckPrereq for ip ping check
5625
    self.check_ip = hostname1.ip
5626

    
5627
    # file storage checks
5628
    if (self.op.file_driver and
5629
        not self.op.file_driver in constants.FILE_DRIVER):
5630
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
5631
                                 self.op.file_driver)
5632

    
5633
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
5634
      raise errors.OpPrereqError("File storage directory path not absolute")
5635

    
5636
    ### Node/iallocator related checks
5637
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
5638
      raise errors.OpPrereqError("One and only one of iallocator and primary"
5639
                                 " node must be given")
5640

    
5641
    if self.op.iallocator:
5642
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5643
    else:
5644
      self.op.pnode = self._ExpandNode(self.op.pnode)
5645
      nodelist = [self.op.pnode]
5646
      if self.op.snode is not None:
5647
        self.op.snode = self._ExpandNode(self.op.snode)
5648
        nodelist.append(self.op.snode)
5649
      self.needed_locks[locking.LEVEL_NODE] = nodelist
5650

    
5651
    # in case of import lock the source node too
5652
    if self.op.mode == constants.INSTANCE_IMPORT:
5653
      src_node = getattr(self.op, "src_node", None)
5654
      src_path = getattr(self.op, "src_path", None)
5655

    
5656
      if src_path is None:
5657
        self.op.src_path = src_path = self.op.instance_name
5658

    
5659
      if src_node is None:
5660
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5661
        self.op.src_node = None
5662
        if os.path.isabs(src_path):
5663
          raise errors.OpPrereqError("Importing an instance from an absolute"
5664
                                     " path requires a source node option.")
5665
      else:
5666
        self.op.src_node = src_node = self._ExpandNode(src_node)
5667
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5668
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
5669
        if not os.path.isabs(src_path):
5670
          self.op.src_path = src_path = \
5671
            os.path.join(constants.EXPORT_DIR, src_path)
5672

    
5673
      # On import force_variant must be True, because if we forced it at
5674
      # initial install, our only chance when importing it back is that it
5675
      # works again!
5676
      self.op.force_variant = True
5677

    
5678
    else: # INSTANCE_CREATE
5679
      if getattr(self.op, "os_type", None) is None:
5680
        raise errors.OpPrereqError("No guest OS specified")
5681
      self.op.force_variant = getattr(self.op, "force_variant", False)
5682

    
5683
  def _RunAllocator(self):
5684
    """Run the allocator based on input opcode.
5685

5686
    """
5687
    nics = [n.ToDict() for n in self.nics]
5688
    ial = IAllocator(self.cfg, self.rpc,
5689
                     mode=constants.IALLOCATOR_MODE_ALLOC,
5690
                     name=self.op.instance_name,
5691
                     disk_template=self.op.disk_template,
5692
                     tags=[],
5693
                     os=self.op.os_type,
5694
                     vcpus=self.be_full[constants.BE_VCPUS],
5695
                     mem_size=self.be_full[constants.BE_MEMORY],
5696
                     disks=self.disks,
5697
                     nics=nics,
5698
                     hypervisor=self.op.hypervisor,
5699
                     )
5700

    
5701
    ial.Run(self.op.iallocator)
5702

    
5703
    if not ial.success:
5704
      raise errors.OpPrereqError("Can't compute nodes using"
5705
                                 " iallocator '%s': %s" % (self.op.iallocator,
5706
                                                           ial.info))
5707
    if len(ial.nodes) != ial.required_nodes:
5708
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5709
                                 " of nodes (%s), required %s" %
5710
                                 (self.op.iallocator, len(ial.nodes),
5711
                                  ial.required_nodes))
5712
    self.op.pnode = ial.nodes[0]
5713
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
5714
                 self.op.instance_name, self.op.iallocator,
5715
                 ", ".join(ial.nodes))
5716
    if ial.required_nodes == 2:
5717
      self.op.snode = ial.nodes[1]
5718

    
5719
  def BuildHooksEnv(self):
5720
    """Build hooks env.
5721

5722
    This runs on master, primary and secondary nodes of the instance.
5723

5724
    """
5725
    env = {
5726
      "ADD_MODE": self.op.mode,
5727
      }
5728
    if self.op.mode == constants.INSTANCE_IMPORT:
5729
      env["SRC_NODE"] = self.op.src_node
5730
      env["SRC_PATH"] = self.op.src_path
5731
      env["SRC_IMAGES"] = self.src_images
5732

    
5733
    env.update(_BuildInstanceHookEnv(
5734
      name=self.op.instance_name,
5735
      primary_node=self.op.pnode,
5736
      secondary_nodes=self.secondaries,
5737
      status=self.op.start,
5738
      os_type=self.op.os_type,
5739
      memory=self.be_full[constants.BE_MEMORY],
5740
      vcpus=self.be_full[constants.BE_VCPUS],
5741
      nics=_NICListToTuple(self, self.nics),
5742
      disk_template=self.op.disk_template,
5743
      disks=[(d["size"], d["mode"]) for d in self.disks],
5744
      bep=self.be_full,
5745
      hvp=self.hv_full,
5746
      hypervisor_name=self.op.hypervisor,
5747
    ))
5748

    
5749
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
5750
          self.secondaries)
5751
    return env, nl, nl
5752

    
5753

    
5754
  def CheckPrereq(self):
5755
    """Check prerequisites.
5756

5757
    """
5758
    if (not self.cfg.GetVGName() and
5759
        self.op.disk_template not in constants.DTS_NOT_LVM):
5760
      raise errors.OpPrereqError("Cluster does not support lvm-based"
5761
                                 " instances")
5762

    
5763
    if self.op.mode == constants.INSTANCE_IMPORT:
5764
      src_node = self.op.src_node
5765
      src_path = self.op.src_path
5766

    
5767
      if src_node is None:
5768
        locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
5769
        exp_list = self.rpc.call_export_list(locked_nodes)
5770
        found = False
5771
        for node in exp_list:
5772
          if exp_list[node].fail_msg:
5773
            continue
5774
          if src_path in exp_list[node].payload:
5775
            found = True
5776
            self.op.src_node = src_node = node
5777
            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
5778
                                                       src_path)
5779
            break
5780
        if not found:
5781
          raise errors.OpPrereqError("No export found for relative path %s" %
5782
                                      src_path)
5783

    
5784
      _CheckNodeOnline(self, src_node)
5785
      result = self.rpc.call_export_info(src_node, src_path)
5786
      result.Raise("No export or invalid export found in dir %s" % src_path)
5787

    
5788
      export_info = objects.SerializableConfigParser.Loads(str(result.payload))
5789
      if not export_info.has_section(constants.INISECT_EXP):
5790
        raise errors.ProgrammerError("Corrupted export config")
5791

    
5792
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
5793
      if (int(ei_version) != constants.EXPORT_VERSION):
5794
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
5795
                                   (ei_version, constants.EXPORT_VERSION))
5796

    
5797
      # Check that the new instance doesn't have less disks than the export
5798
      instance_disks = len(self.disks)
5799
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
5800
      if instance_disks < export_disks:
5801
        raise errors.OpPrereqError("Not enough disks to import."
5802
                                   " (instance: %d, export: %d)" %
5803
                                   (instance_disks, export_disks))
5804

    
5805
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
5806
      disk_images = []
5807
      for idx in range(export_disks):
5808
        option = 'disk%d_dump' % idx
5809
        if export_info.has_option(constants.INISECT_INS, option):
5810
          # FIXME: are the old os-es, disk sizes, etc. useful?
5811
          export_name = export_info.get(constants.INISECT_INS, option)
5812
          image = os.path.join(src_path, export_name)
5813
          disk_images.append(image)
5814
        else:
5815
          disk_images.append(False)
5816

    
5817
      self.src_images = disk_images
5818

    
5819
      old_name = export_info.get(constants.INISECT_INS, 'name')
5820
      # FIXME: int() here could throw a ValueError on broken exports
5821
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
5822
      if self.op.instance_name == old_name:
5823
        for idx, nic in enumerate(self.nics):
5824
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
5825
            nic_mac_ini = 'nic%d_mac' % idx
5826
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
5827

    
5828
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
5829
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
5830
    if self.op.start and not self.op.ip_check:
5831
      raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
5832
                                 " adding an instance in start mode")
5833

    
5834
    if self.op.ip_check:
5835
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
5836
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5837
                                   (self.check_ip, self.op.instance_name))
5838

    
5839
    #### mac address generation
5840
    # By generating here the mac address both the allocator and the hooks get
5841
    # the real final mac address rather than the 'auto' or 'generate' value.
5842
    # There is a race condition between the generation and the instance object
5843
    # creation, which means that we know the mac is valid now, but we're not
5844
    # sure it will be when we actually add the instance. If things go bad
5845
    # adding the instance will abort because of a duplicate mac, and the
5846
    # creation job will fail.
5847
    for nic in self.nics:
5848
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5849
        nic.mac = self.cfg.GenerateMAC()
5850

    
5851
    #### allocator run
5852

    
5853
    if self.op.iallocator is not None:
5854
      self._RunAllocator()
5855

    
5856
    #### node related checks
5857

    
5858
    # check primary node
5859
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
5860
    assert self.pnode is not None, \
5861
      "Cannot retrieve locked node %s" % self.op.pnode
5862
    if pnode.offline:
5863
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
5864
                                 pnode.name)
5865
    if pnode.drained:
5866
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
5867
                                 pnode.name)
5868

    
5869
    self.secondaries = []
5870

    
5871
    # mirror node verification
5872
    if self.op.disk_template in constants.DTS_NET_MIRROR:
5873
      if self.op.snode is None:
5874
        raise errors.OpPrereqError("The networked disk templates need"
5875
                                   " a mirror node")
5876
      if self.op.snode == pnode.name:
5877
        raise errors.OpPrereqError("The secondary node cannot be"
5878
                                   " the primary node.")
5879
      _CheckNodeOnline(self, self.op.snode)
5880
      _CheckNodeNotDrained(self, self.op.snode)
5881
      self.secondaries.append(self.op.snode)
5882

    
5883
    nodenames = [pnode.name] + self.secondaries
5884

    
5885
    req_size = _ComputeDiskSize(self.op.disk_template,
5886
                                self.disks)
5887

    
5888
    # Check lv size requirements
5889
    if req_size is not None:
5890
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
5891
                                         self.op.hypervisor)
5892
      for node in nodenames:
5893
        info = nodeinfo[node]
5894
        info.Raise("Cannot get current information from node %s" % node)
5895
        info = info.payload
5896
        vg_free = info.get('vg_free', None)
5897
        if not isinstance(vg_free, int):
5898
          raise errors.OpPrereqError("Can't compute free disk space on"
5899
                                     " node %s" % node)
5900
        if req_size > vg_free:
5901
          raise errors.OpPrereqError("Not enough disk space on target node %s."
5902
                                     " %d MB available, %d MB required" %
5903
                                     (node, vg_free, req_size))
5904

    
5905
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
5906

    
5907
    # os verification
5908
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
5909
    result.Raise("OS '%s' not in supported os list for primary node %s" %
5910
                 (self.op.os_type, pnode.name), prereq=True)
5911
    if not self.op.force_variant:
5912
      _CheckOSVariant(result.payload, self.op.os_type)
5913

    
5914
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
5915

    
5916
    # memory check on primary node
5917
    if self.op.start:
5918
      _CheckNodeFreeMemory(self, self.pnode.name,
5919
                           "creating instance %s" % self.op.instance_name,
5920
                           self.be_full[constants.BE_MEMORY],
5921
                           self.op.hypervisor)
5922

    
5923
    self.dry_run_result = list(nodenames)
5924

    
5925
  def Exec(self, feedback_fn):
5926
    """Create and add the instance to the cluster.
5927

5928
    """
5929
    instance = self.op.instance_name
5930
    pnode_name = self.pnode.name
5931

    
5932
    ht_kind = self.op.hypervisor
5933
    if ht_kind in constants.HTS_REQ_PORT:
5934
      network_port = self.cfg.AllocatePort()
5935
    else:
5936
      network_port = None
5937

    
5938
    ##if self.op.vnc_bind_address is None:
5939
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
5940

    
5941
    # this is needed because os.path.join does not accept None arguments
5942
    if self.op.file_storage_dir is None:
5943
      string_file_storage_dir = ""
5944
    else:
5945
      string_file_storage_dir = self.op.file_storage_dir
5946

    
5947
    # build the full file storage dir path
5948
    file_storage_dir = os.path.normpath(os.path.join(
5949
                                        self.cfg.GetFileStorageDir(),
5950
                                        string_file_storage_dir, instance))
5951

    
5952

    
5953
    disks = _GenerateDiskTemplate(self,
5954
                                  self.op.disk_template,
5955
                                  instance, pnode_name,
5956
                                  self.secondaries,
5957
                                  self.disks,
5958
                                  file_storage_dir,
5959
                                  self.op.file_driver,
5960
                                  0)
5961

    
5962
    iobj = objects.Instance(name=instance, os=self.op.os_type,
5963
                            primary_node=pnode_name,
5964
                            nics=self.nics, disks=disks,
5965
                            disk_template=self.op.disk_template,
5966
                            admin_up=False,
5967
                            network_port=network_port,
5968
                            beparams=self.op.beparams,
5969
                            hvparams=self.op.hvparams,
5970
                            hypervisor=self.op.hypervisor,
5971
                            )
5972

    
5973
    feedback_fn("* creating instance disks...")
5974
    try:
5975
      _CreateDisks(self, iobj)
5976
    except errors.OpExecError:
5977
      self.LogWarning("Device creation failed, reverting...")
5978
      try:
5979
        _RemoveDisks(self, iobj)
5980
      finally:
5981
        self.cfg.ReleaseDRBDMinors(instance)
5982
        raise
5983

    
5984
    feedback_fn("adding instance %s to cluster config" % instance)
5985

    
5986
    self.cfg.AddInstance(iobj)
5987
    # Declare that we don't want to remove the instance lock anymore, as we've
5988
    # added the instance to the config
5989
    del self.remove_locks[locking.LEVEL_INSTANCE]
5990
    # Unlock all the nodes
5991
    if self.op.mode == constants.INSTANCE_IMPORT:
5992
      nodes_keep = [self.op.src_node]
5993
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
5994
                       if node != self.op.src_node]
5995
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
5996
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
5997
    else:
5998
      self.context.glm.release(locking.LEVEL_NODE)
5999
      del self.acquired_locks[locking.LEVEL_NODE]
6000

    
6001
    if self.op.wait_for_sync:
6002
      disk_abort = not _WaitForSync(self, iobj)
6003
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
6004
      # make sure the disks are not degraded (still sync-ing is ok)
6005
      time.sleep(15)
6006
      feedback_fn("* checking mirrors status")
6007
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6008
    else:
6009
      disk_abort = False
6010

    
6011
    if disk_abort:
6012
      _RemoveDisks(self, iobj)
6013
      self.cfg.RemoveInstance(iobj.name)
6014
      # Make sure the instance lock gets removed
6015
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6016
      raise errors.OpExecError("There are some degraded disks for"
6017
                               " this instance")
6018

    
6019
    feedback_fn("creating os for instance %s on node %s" %
6020
                (instance, pnode_name))
6021

    
6022
    if iobj.disk_template != constants.DT_DISKLESS:
6023
      if self.op.mode == constants.INSTANCE_CREATE:
6024
        feedback_fn("* running the instance OS create scripts...")
6025
        result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
6026
        result.Raise("Could not add os for instance %s"
6027
                     " on node %s" % (instance, pnode_name))
6028

    
6029
      elif self.op.mode == constants.INSTANCE_IMPORT:
6030
        feedback_fn("* running the instance OS import scripts...")
6031
        src_node = self.op.src_node
6032
        src_images = self.src_images
6033
        cluster_name = self.cfg.GetClusterName()
6034
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6035
                                                         src_node, src_images,
6036
                                                         cluster_name)
6037
        msg = import_result.fail_msg
6038
        if msg:
6039
          self.LogWarning("Error while importing the disk images for instance"
6040
                          " %s on node %s: %s" % (instance, pnode_name, msg))
6041
      else:
6042
        # also checked in the prereq part
6043
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6044
                                     % self.op.mode)
6045

    
6046
    if self.op.start:
6047
      iobj.admin_up = True
6048
      self.cfg.Update(iobj, feedback_fn)
6049
      logging.info("Starting instance %s on node %s", instance, pnode_name)
6050
      feedback_fn("* starting instance...")
6051
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6052
      result.Raise("Could not start instance")
6053

    
6054
    return list(iobj.all_nodes)
6055

    
6056

    
6057
class LUConnectConsole(NoHooksLU):
6058
  """Connect to an instance's console.
6059

6060
  This is somewhat special in that it returns the command line that
6061
  you need to run on the master node in order to connect to the
6062
  console.
6063

6064
  """
6065
  _OP_REQP = ["instance_name"]
6066
  REQ_BGL = False
6067

    
6068
  def ExpandNames(self):
6069
    self._ExpandAndLockInstance()
6070

    
6071
  def CheckPrereq(self):
6072
    """Check prerequisites.
6073

6074
    This checks that the instance is in the cluster.
6075

6076
    """
6077
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6078
    assert self.instance is not None, \
6079
      "Cannot retrieve locked instance %s" % self.op.instance_name
6080
    _CheckNodeOnline(self, self.instance.primary_node)
6081

    
6082
  def Exec(self, feedback_fn):
6083
    """Connect to the console of an instance
6084

6085
    """
6086
    instance = self.instance
6087
    node = instance.primary_node
6088

    
6089
    node_insts = self.rpc.call_instance_list([node],
6090
                                             [instance.hypervisor])[node]
6091
    node_insts.Raise("Can't get node information from %s" % node)
6092

    
6093
    if instance.name not in node_insts.payload:
6094
      raise errors.OpExecError("Instance %s is not running." % instance.name)
6095

    
6096
    logging.debug("Connecting to console of %s on %s", instance.name, node)
6097

    
6098
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
6099
    cluster = self.cfg.GetClusterInfo()
6100
    # beparams and hvparams are passed separately, to avoid editing the
6101
    # instance and then saving the defaults in the instance itself.
6102
    hvparams = cluster.FillHV(instance)
6103
    beparams = cluster.FillBE(instance)
6104
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6105

    
6106
    # build ssh cmdline
6107
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6108

    
6109

    
6110
class LUReplaceDisks(LogicalUnit):
6111
  """Replace the disks of an instance.
6112

6113
  """
6114
  HPATH = "mirrors-replace"
6115
  HTYPE = constants.HTYPE_INSTANCE
6116
  _OP_REQP = ["instance_name", "mode", "disks"]
6117
  REQ_BGL = False
6118

    
6119
  def CheckArguments(self):
6120
    if not hasattr(self.op, "remote_node"):
6121
      self.op.remote_node = None
6122
    if not hasattr(self.op, "iallocator"):
6123
      self.op.iallocator = None
6124

    
6125
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6126
                                  self.op.iallocator)
6127

    
6128
  def ExpandNames(self):
6129
    self._ExpandAndLockInstance()
6130

    
6131
    if self.op.iallocator is not None:
6132
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6133

    
6134
    elif self.op.remote_node is not None:
6135
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
6136
      if remote_node is None:
6137
        raise errors.OpPrereqError("Node '%s' not known" %
6138
                                   self.op.remote_node)
6139

    
6140
      self.op.remote_node = remote_node
6141

    
6142
      # Warning: do not remove the locking of the new secondary here
6143
      # unless DRBD8.AddChildren is changed to work in parallel;
6144
      # currently it doesn't since parallel invocations of
6145
      # FindUnusedMinor will conflict
6146
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6147
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6148

    
6149
    else:
6150
      self.needed_locks[locking.LEVEL_NODE] = []
6151
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6152

    
6153
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6154
                                   self.op.iallocator, self.op.remote_node,
6155
                                   self.op.disks)
6156

    
6157
    self.tasklets = [self.replacer]
6158

    
6159
  def DeclareLocks(self, level):
6160
    # If we're not already locking all nodes in the set we have to declare the
6161
    # instance's primary/secondary nodes.
6162
    if (level == locking.LEVEL_NODE and
6163
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6164
      self._LockInstancesNodes()
6165

    
6166
  def BuildHooksEnv(self):
6167
    """Build hooks env.
6168

6169
    This runs on the master, the primary and all the secondaries.
6170

6171
    """
6172
    instance = self.replacer.instance
6173
    env = {
6174
      "MODE": self.op.mode,
6175
      "NEW_SECONDARY": self.op.remote_node,
6176
      "OLD_SECONDARY": instance.secondary_nodes[0],
6177
      }
6178
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6179
    nl = [
6180
      self.cfg.GetMasterNode(),
6181
      instance.primary_node,
6182
      ]
6183
    if self.op.remote_node is not None:
6184
      nl.append(self.op.remote_node)
6185
    return env, nl, nl
6186

    
6187

    
6188
class LUEvacuateNode(LogicalUnit):
6189
  """Relocate the secondary instances from a node.
6190

6191
  """
6192
  HPATH = "node-evacuate"
6193
  HTYPE = constants.HTYPE_NODE
6194
  _OP_REQP = ["node_name"]
6195
  REQ_BGL = False
6196

    
6197
  def CheckArguments(self):
6198
    if not hasattr(self.op, "remote_node"):
6199
      self.op.remote_node = None
6200
    if not hasattr(self.op, "iallocator"):
6201
      self.op.iallocator = None
6202

    
6203
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6204
                                  self.op.remote_node,
6205
                                  self.op.iallocator)
6206

    
6207
  def ExpandNames(self):
6208
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
6209
    if self.op.node_name is None:
6210
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
6211

    
6212
    self.needed_locks = {}
6213

    
6214
    # Declare node locks
6215
    if self.op.iallocator is not None:
6216
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6217

    
6218
    elif self.op.remote_node is not None:
6219
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
6220
      if remote_node is None:
6221
        raise errors.OpPrereqError("Node '%s' not known" %
6222
                                   self.op.remote_node)
6223

    
6224
      self.op.remote_node = remote_node
6225

    
6226
      # Warning: do not remove the locking of the new secondary here
6227
      # unless DRBD8.AddChildren is changed to work in parallel;
6228
      # currently it doesn't since parallel invocations of
6229
      # FindUnusedMinor will conflict
6230
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6231
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6232

    
6233
    else:
6234
      raise errors.OpPrereqError("Invalid parameters")
6235

    
6236
    # Create tasklets for replacing disks for all secondary instances on this
6237
    # node
6238
    names = []
6239
    tasklets = []
6240

    
6241
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6242
      logging.debug("Replacing disks for instance %s", inst.name)
6243
      names.append(inst.name)
6244

    
6245
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6246
                                self.op.iallocator, self.op.remote_node, [])
6247
      tasklets.append(replacer)
6248

    
6249
    self.tasklets = tasklets
6250
    self.instance_names = names
6251

    
6252
    # Declare instance locks
6253
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6254

    
6255
  def DeclareLocks(self, level):
6256
    # If we're not already locking all nodes in the set we have to declare the
6257
    # instance's primary/secondary nodes.
6258
    if (level == locking.LEVEL_NODE and
6259
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6260
      self._LockInstancesNodes()
6261

    
6262
  def BuildHooksEnv(self):
6263
    """Build hooks env.
6264

6265
    This runs on the master, the primary and all the secondaries.
6266

6267
    """
6268
    env = {
6269
      "NODE_NAME": self.op.node_name,
6270
      }
6271

    
6272
    nl = [self.cfg.GetMasterNode()]
6273

    
6274
    if self.op.remote_node is not None:
6275
      env["NEW_SECONDARY"] = self.op.remote_node
6276
      nl.append(self.op.remote_node)
6277

    
6278
    return (env, nl, nl)
6279

    
6280

    
6281
class TLReplaceDisks(Tasklet):
6282
  """Replaces disks for an instance.
6283

6284
  Note: Locking is not within the scope of this class.
6285

6286
  """
6287
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6288
               disks):
6289
    """Initializes this class.
6290

6291
    """
6292
    Tasklet.__init__(self, lu)
6293

    
6294
    # Parameters
6295
    self.instance_name = instance_name
6296
    self.mode = mode
6297
    self.iallocator_name = iallocator_name
6298
    self.remote_node = remote_node
6299
    self.disks = disks
6300

    
6301
    # Runtime data
6302
    self.instance = None
6303
    self.new_node = None
6304
    self.target_node = None
6305
    self.other_node = None
6306
    self.remote_node_info = None
6307
    self.node_secondary_ip = None
6308

    
6309
  @staticmethod
6310
  def CheckArguments(mode, remote_node, iallocator):
6311
    """Helper function for users of this class.
6312

6313
    """
6314
    # check for valid parameter combination
6315
    if mode == constants.REPLACE_DISK_CHG:
6316
      if remote_node is None and iallocator is None:
6317
        raise errors.OpPrereqError("When changing the secondary either an"
6318
                                   " iallocator script must be used or the"
6319
                                   " new node given")
6320

    
6321
      if remote_node is not None and iallocator is not None:
6322
        raise errors.OpPrereqError("Give either the iallocator or the new"
6323
                                   " secondary, not both")
6324

    
6325
    elif remote_node is not None or iallocator is not None:
6326
      # Not replacing the secondary
6327
      raise errors.OpPrereqError("The iallocator and new node options can"
6328
                                 " only be used when changing the"
6329
                                 " secondary node")
6330

    
6331
  @staticmethod
6332
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
6333
    """Compute a new secondary node using an IAllocator.
6334

6335
    """
6336
    ial = IAllocator(lu.cfg, lu.rpc,
6337
                     mode=constants.IALLOCATOR_MODE_RELOC,
6338
                     name=instance_name,
6339
                     relocate_from=relocate_from)
6340

    
6341
    ial.Run(iallocator_name)
6342

    
6343
    if not ial.success:
6344
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
6345
                                 " %s" % (iallocator_name, ial.info))
6346

    
6347
    if len(ial.nodes) != ial.required_nodes:
6348
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6349
                                 " of nodes (%s), required %s" %
6350
                                 (len(ial.nodes), ial.required_nodes))
6351

    
6352
    remote_node_name = ial.nodes[0]
6353

    
6354
    lu.LogInfo("Selected new secondary for instance '%s': %s",
6355
               instance_name, remote_node_name)
6356

    
6357
    return remote_node_name
6358

    
6359
  def _FindFaultyDisks(self, node_name):
6360
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
6361
                                    node_name, True)
6362

    
6363
  def CheckPrereq(self):
6364
    """Check prerequisites.
6365

6366
    This checks that the instance is in the cluster.
6367

6368
    """
6369
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
6370
    assert instance is not None, \
6371
      "Cannot retrieve locked instance %s" % self.instance_name
6372

    
6373
    if instance.disk_template != constants.DT_DRBD8:
6374
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
6375
                                 " instances")
6376

    
6377
    if len(instance.secondary_nodes) != 1:
6378
      raise errors.OpPrereqError("The instance has a strange layout,"
6379
                                 " expected one secondary but found %d" %
6380
                                 len(instance.secondary_nodes))
6381

    
6382
    secondary_node = instance.secondary_nodes[0]
6383

    
6384
    if self.iallocator_name is None:
6385
      remote_node = self.remote_node
6386
    else:
6387
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
6388
                                       instance.name, instance.secondary_nodes)
6389

    
6390
    if remote_node is not None:
6391
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
6392
      assert self.remote_node_info is not None, \
6393
        "Cannot retrieve locked node %s" % remote_node
6394
    else:
6395
      self.remote_node_info = None
6396

    
6397
    if remote_node == self.instance.primary_node:
6398
      raise errors.OpPrereqError("The specified node is the primary node of"
6399
                                 " the instance.")
6400

    
6401
    if remote_node == secondary_node:
6402
      raise errors.OpPrereqError("The specified node is already the"
6403
                                 " secondary node of the instance.")
6404

    
6405
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
6406
                                    constants.REPLACE_DISK_CHG):
6407
      raise errors.OpPrereqError("Cannot specify disks to be replaced")
6408

    
6409
    if self.mode == constants.REPLACE_DISK_AUTO:
6410
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
6411
      faulty_secondary = self._FindFaultyDisks(secondary_node)
6412

    
6413
      if faulty_primary and faulty_secondary:
6414
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
6415
                                   " one node and can not be repaired"
6416
                                   " automatically" % self.instance_name)
6417

    
6418
      if faulty_primary:
6419
        self.disks = faulty_primary
6420
        self.target_node = instance.primary_node
6421
        self.other_node = secondary_node
6422
        check_nodes = [self.target_node, self.other_node]
6423
      elif faulty_secondary:
6424
        self.disks = faulty_secondary
6425
        self.target_node = secondary_node
6426
        self.other_node = instance.primary_node
6427
        check_nodes = [self.target_node, self.other_node]
6428
      else:
6429
        self.disks = []
6430
        check_nodes = []
6431

    
6432
    else:
6433
      # Non-automatic modes
6434
      if self.mode == constants.REPLACE_DISK_PRI:
6435
        self.target_node = instance.primary_node
6436
        self.other_node = secondary_node
6437
        check_nodes = [self.target_node, self.other_node]
6438

    
6439
      elif self.mode == constants.REPLACE_DISK_SEC:
6440
        self.target_node = secondary_node
6441
        self.other_node = instance.primary_node
6442
        check_nodes = [self.target_node, self.other_node]
6443

    
6444
      elif self.mode == constants.REPLACE_DISK_CHG:
6445
        self.new_node = remote_node
6446
        self.other_node = instance.primary_node
6447
        self.target_node = secondary_node
6448
        check_nodes = [self.new_node, self.other_node]
6449

    
6450
        _CheckNodeNotDrained(self.lu, remote_node)
6451

    
6452
      else:
6453
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
6454
                                     self.mode)
6455

    
6456
      # If not specified all disks should be replaced
6457
      if not self.disks:
6458
        self.disks = range(len(self.instance.disks))
6459

    
6460
    for node in check_nodes:
6461
      _CheckNodeOnline(self.lu, node)
6462

    
6463
    # Check whether disks are valid
6464
    for disk_idx in self.disks:
6465
      instance.FindDisk(disk_idx)
6466

    
6467
    # Get secondary node IP addresses
6468
    node_2nd_ip = {}
6469

    
6470
    for node_name in [self.target_node, self.other_node, self.new_node]:
6471
      if node_name is not None:
6472
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
6473

    
6474
    self.node_secondary_ip = node_2nd_ip
6475

    
6476
  def Exec(self, feedback_fn):
6477
    """Execute disk replacement.
6478

6479
    This dispatches the disk replacement to the appropriate handler.
6480

6481
    """
6482
    if not self.disks:
6483
      feedback_fn("No disks need replacement")
6484
      return
6485

    
6486
    feedback_fn("Replacing disk(s) %s for %s" %
6487
                (", ".join([str(i) for i in self.disks]), self.instance.name))
6488

    
6489
    activate_disks = (not self.instance.admin_up)
6490

    
6491
    # Activate the instance disks if we're replacing them on a down instance
6492
    if activate_disks:
6493
      _StartInstanceDisks(self.lu, self.instance, True)
6494

    
6495
    try:
6496
      # Should we replace the secondary node?
6497
      if self.new_node is not None:
6498
        fn = self._ExecDrbd8Secondary
6499
      else:
6500
        fn = self._ExecDrbd8DiskOnly
6501

    
6502
      return fn(feedback_fn)
6503

    
6504
    finally:
6505
      # Deactivate the instance disks if we're replacing them on a down instance
6506
      if activate_disks:
6507
        _SafeShutdownInstanceDisks(self.lu, self.instance)
6508

    
6509
  def _CheckVolumeGroup(self, nodes):
6510
    self.lu.LogInfo("Checking volume groups")
6511

    
6512
    vgname = self.cfg.GetVGName()
6513

    
6514
    # Make sure volume group exists on all involved nodes
6515
    results = self.rpc.call_vg_list(nodes)
6516
    if not results:
6517
      raise errors.OpExecError("Can't list volume groups on the nodes")
6518

    
6519
    for node in nodes:
6520
      res = results[node]
6521
      res.Raise("Error checking node %s" % node)
6522
      if vgname not in res.payload:
6523
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
6524
                                 (vgname, node))
6525

    
6526
  def _CheckDisksExistence(self, nodes):
6527
    # Check disk existence
6528
    for idx, dev in enumerate(self.instance.disks):
6529
      if idx not in self.disks:
6530
        continue
6531

    
6532
      for node in nodes:
6533
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
6534
        self.cfg.SetDiskID(dev, node)
6535

    
6536
        result = self.rpc.call_blockdev_find(node, dev)
6537

    
6538
        msg = result.fail_msg
6539
        if msg or not result.payload:
6540
          if not msg:
6541
            msg = "disk not found"
6542
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
6543
                                   (idx, node, msg))
6544

    
6545
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
6546
    for idx, dev in enumerate(self.instance.disks):
6547
      if idx not in self.disks:
6548
        continue
6549

    
6550
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
6551
                      (idx, node_name))
6552

    
6553
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
6554
                                   ldisk=ldisk):
6555
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
6556
                                 " replace disks for instance %s" %
6557
                                 (node_name, self.instance.name))
6558

    
6559
  def _CreateNewStorage(self, node_name):
6560
    vgname = self.cfg.GetVGName()
6561
    iv_names = {}
6562

    
6563
    for idx, dev in enumerate(self.instance.disks):
6564
      if idx not in self.disks:
6565
        continue
6566

    
6567
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
6568

    
6569
      self.cfg.SetDiskID(dev, node_name)
6570

    
6571
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
6572
      names = _GenerateUniqueNames(self.lu, lv_names)
6573

    
6574
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
6575
                             logical_id=(vgname, names[0]))
6576
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6577
                             logical_id=(vgname, names[1]))
6578

    
6579
      new_lvs = [lv_data, lv_meta]
6580
      old_lvs = dev.children
6581
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
6582

    
6583
      # we pass force_create=True to force the LVM creation
6584
      for new_lv in new_lvs:
6585
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
6586
                        _GetInstanceInfoText(self.instance), False)
6587

    
6588
    return iv_names
6589

    
6590
  def _CheckDevices(self, node_name, iv_names):
6591
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
6592
      self.cfg.SetDiskID(dev, node_name)
6593

    
6594
      result = self.rpc.call_blockdev_find(node_name, dev)
6595

    
6596
      msg = result.fail_msg
6597
      if msg or not result.payload:
6598
        if not msg:
6599
          msg = "disk not found"
6600
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
6601
                                 (name, msg))
6602

    
6603
      if result.payload.is_degraded:
6604
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
6605

    
6606
  def _RemoveOldStorage(self, node_name, iv_names):
6607
    for name, (dev, old_lvs, _) in iv_names.iteritems():
6608
      self.lu.LogInfo("Remove logical volumes for %s" % name)
6609

    
6610
      for lv in old_lvs:
6611
        self.cfg.SetDiskID(lv, node_name)
6612

    
6613
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
6614
        if msg:
6615
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
6616
                             hint="remove unused LVs manually")
6617

    
6618
  def _ExecDrbd8DiskOnly(self, feedback_fn):
6619
    """Replace a disk on the primary or secondary for DRBD 8.
6620

6621
    The algorithm for replace is quite complicated:
6622

6623
      1. for each disk to be replaced:
6624

6625
        1. create new LVs on the target node with unique names
6626
        1. detach old LVs from the drbd device
6627
        1. rename old LVs to name_replaced.<time_t>
6628
        1. rename new LVs to old LVs
6629
        1. attach the new LVs (with the old names now) to the drbd device
6630

6631
      1. wait for sync across all devices
6632

6633
      1. for each modified disk:
6634

6635
        1. remove old LVs (which have the name name_replaces.<time_t>)
6636

6637
    Failures are not very well handled.
6638

6639
    """
6640
    steps_total = 6
6641

    
6642
    # Step: check device activation
6643
    self.lu.LogStep(1, steps_total, "Check device existence")
6644
    self._CheckDisksExistence([self.other_node, self.target_node])
6645
    self._CheckVolumeGroup([self.target_node, self.other_node])
6646

    
6647
    # Step: check other node consistency
6648
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6649
    self._CheckDisksConsistency(self.other_node,
6650
                                self.other_node == self.instance.primary_node,
6651
                                False)
6652

    
6653
    # Step: create new storage
6654
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6655
    iv_names = self._CreateNewStorage(self.target_node)
6656

    
6657
    # Step: for each lv, detach+rename*2+attach
6658
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6659
    for dev, old_lvs, new_lvs in iv_names.itervalues():
6660
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
6661

    
6662
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
6663
                                                     old_lvs)
6664
      result.Raise("Can't detach drbd from local storage on node"
6665
                   " %s for device %s" % (self.target_node, dev.iv_name))
6666
      #dev.children = []
6667
      #cfg.Update(instance)
6668

    
6669
      # ok, we created the new LVs, so now we know we have the needed
6670
      # storage; as such, we proceed on the target node to rename
6671
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
6672
      # using the assumption that logical_id == physical_id (which in
6673
      # turn is the unique_id on that node)
6674

    
6675
      # FIXME(iustin): use a better name for the replaced LVs
6676
      temp_suffix = int(time.time())
6677
      ren_fn = lambda d, suff: (d.physical_id[0],
6678
                                d.physical_id[1] + "_replaced-%s" % suff)
6679

    
6680
      # Build the rename list based on what LVs exist on the node
6681
      rename_old_to_new = []
6682
      for to_ren in old_lvs:
6683
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
6684
        if not result.fail_msg and result.payload:
6685
          # device exists
6686
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
6687

    
6688
      self.lu.LogInfo("Renaming the old LVs on the target node")
6689
      result = self.rpc.call_blockdev_rename(self.target_node,
6690
                                             rename_old_to_new)
6691
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
6692

    
6693
      # Now we rename the new LVs to the old LVs
6694
      self.lu.LogInfo("Renaming the new LVs on the target node")
6695
      rename_new_to_old = [(new, old.physical_id)
6696
                           for old, new in zip(old_lvs, new_lvs)]
6697
      result = self.rpc.call_blockdev_rename(self.target_node,
6698
                                             rename_new_to_old)
6699
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
6700

    
6701
      for old, new in zip(old_lvs, new_lvs):
6702
        new.logical_id = old.logical_id
6703
        self.cfg.SetDiskID(new, self.target_node)
6704

    
6705
      for disk in old_lvs:
6706
        disk.logical_id = ren_fn(disk, temp_suffix)
6707
        self.cfg.SetDiskID(disk, self.target_node)
6708

    
6709
      # Now that the new lvs have the old name, we can add them to the device
6710
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
6711
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
6712
                                                  new_lvs)
6713
      msg = result.fail_msg
6714
      if msg:
6715
        for new_lv in new_lvs:
6716
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
6717
                                               new_lv).fail_msg
6718
          if msg2:
6719
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
6720
                               hint=("cleanup manually the unused logical"
6721
                                     "volumes"))
6722
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
6723

    
6724
      dev.children = new_lvs
6725

    
6726
      self.cfg.Update(self.instance, feedback_fn)
6727

    
6728
    # Wait for sync
6729
    # This can fail as the old devices are degraded and _WaitForSync
6730
    # does a combined result over all disks, so we don't check its return value
6731
    self.lu.LogStep(5, steps_total, "Sync devices")
6732
    _WaitForSync(self.lu, self.instance, unlock=True)
6733

    
6734
    # Check all devices manually
6735
    self._CheckDevices(self.instance.primary_node, iv_names)
6736

    
6737
    # Step: remove old storage
6738
    self.lu.LogStep(6, steps_total, "Removing old storage")
6739
    self._RemoveOldStorage(self.target_node, iv_names)
6740

    
6741
  def _ExecDrbd8Secondary(self, feedback_fn):
6742
    """Replace the secondary node for DRBD 8.
6743

6744
    The algorithm for replace is quite complicated:
6745
      - for all disks of the instance:
6746
        - create new LVs on the new node with same names
6747
        - shutdown the drbd device on the old secondary
6748
        - disconnect the drbd network on the primary
6749
        - create the drbd device on the new secondary
6750
        - network attach the drbd on the primary, using an artifice:
6751
          the drbd code for Attach() will connect to the network if it
6752
          finds a device which is connected to the good local disks but
6753
          not network enabled
6754
      - wait for sync across all devices
6755
      - remove all disks from the old secondary
6756

6757
    Failures are not very well handled.
6758

6759
    """
6760
    steps_total = 6
6761

    
6762
    # Step: check device activation
6763
    self.lu.LogStep(1, steps_total, "Check device existence")
6764
    self._CheckDisksExistence([self.instance.primary_node])
6765
    self._CheckVolumeGroup([self.instance.primary_node])
6766

    
6767
    # Step: check other node consistency
6768
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6769
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
6770

    
6771
    # Step: create new storage
6772
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6773
    for idx, dev in enumerate(self.instance.disks):
6774
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
6775
                      (self.new_node, idx))
6776
      # we pass force_create=True to force LVM creation
6777
      for new_lv in dev.children:
6778
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
6779
                        _GetInstanceInfoText(self.instance), False)
6780

    
6781
    # Step 4: dbrd minors and drbd setups changes
6782
    # after this, we must manually remove the drbd minors on both the
6783
    # error and the success paths
6784
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6785
    minors = self.cfg.AllocateDRBDMinor([self.new_node
6786
                                         for dev in self.instance.disks],
6787
                                        self.instance.name)
6788
    logging.debug("Allocated minors %r" % (minors,))
6789

    
6790
    iv_names = {}
6791
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
6792
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
6793
                      (self.new_node, idx))
6794
      # create new devices on new_node; note that we create two IDs:
6795
      # one without port, so the drbd will be activated without
6796
      # networking information on the new node at this stage, and one
6797
      # with network, for the latter activation in step 4
6798
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
6799
      if self.instance.primary_node == o_node1:
6800
        p_minor = o_minor1
6801
      else:
6802
        p_minor = o_minor2
6803

    
6804
      new_alone_id = (self.instance.primary_node, self.new_node, None,
6805
                      p_minor, new_minor, o_secret)
6806
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
6807
                    p_minor, new_minor, o_secret)
6808

    
6809
      iv_names[idx] = (dev, dev.children, new_net_id)
6810
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
6811
                    new_net_id)
6812
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
6813
                              logical_id=new_alone_id,
6814
                              children=dev.children,
6815
                              size=dev.size)
6816
      try:
6817
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
6818
                              _GetInstanceInfoText(self.instance), False)
6819
      except errors.GenericError:
6820
        self.cfg.ReleaseDRBDMinors(self.instance.name)
6821
        raise
6822

    
6823
    # We have new devices, shutdown the drbd on the old secondary
6824
    for idx, dev in enumerate(self.instance.disks):
6825
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
6826
      self.cfg.SetDiskID(dev, self.target_node)
6827
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
6828
      if msg:
6829
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
6830
                           "node: %s" % (idx, msg),
6831
                           hint=("Please cleanup this device manually as"
6832
                                 " soon as possible"))
6833

    
6834
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
6835
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
6836
                                               self.node_secondary_ip,
6837
                                               self.instance.disks)\
6838
                                              [self.instance.primary_node]
6839

    
6840
    msg = result.fail_msg
6841
    if msg:
6842
      # detaches didn't succeed (unlikely)
6843
      self.cfg.ReleaseDRBDMinors(self.instance.name)
6844
      raise errors.OpExecError("Can't detach the disks from the network on"
6845
                               " old node: %s" % (msg,))
6846

    
6847
    # if we managed to detach at least one, we update all the disks of
6848
    # the instance to point to the new secondary
6849
    self.lu.LogInfo("Updating instance configuration")
6850
    for dev, _, new_logical_id in iv_names.itervalues():
6851
      dev.logical_id = new_logical_id
6852
      self.cfg.SetDiskID(dev, self.instance.primary_node)
6853

    
6854
    self.cfg.Update(self.instance, feedback_fn)
6855

    
6856
    # and now perform the drbd attach
6857
    self.lu.LogInfo("Attaching primary drbds to new secondary"
6858
                    " (standalone => connected)")
6859
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
6860
                                            self.new_node],
6861
                                           self.node_secondary_ip,
6862
                                           self.instance.disks,
6863
                                           self.instance.name,
6864
                                           False)
6865
    for to_node, to_result in result.items():
6866
      msg = to_result.fail_msg
6867
      if msg:
6868
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
6869
                           to_node, msg,
6870
                           hint=("please do a gnt-instance info to see the"
6871
                                 " status of disks"))
6872

    
6873
    # Wait for sync
6874
    # This can fail as the old devices are degraded and _WaitForSync
6875
    # does a combined result over all disks, so we don't check its return value
6876
    self.lu.LogStep(5, steps_total, "Sync devices")
6877
    _WaitForSync(self.lu, self.instance, unlock=True)
6878

    
6879
    # Check all devices manually
6880
    self._CheckDevices(self.instance.primary_node, iv_names)
6881

    
6882
    # Step: remove old storage
6883
    self.lu.LogStep(6, steps_total, "Removing old storage")
6884
    self._RemoveOldStorage(self.target_node, iv_names)
6885

    
6886

    
6887
class LURepairNodeStorage(NoHooksLU):
6888
  """Repairs the volume group on a node.
6889

6890
  """
6891
  _OP_REQP = ["node_name"]
6892
  REQ_BGL = False
6893

    
6894
  def CheckArguments(self):
6895
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
6896
    if node_name is None:
6897
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
6898

    
6899
    self.op.node_name = node_name
6900

    
6901
  def ExpandNames(self):
6902
    self.needed_locks = {
6903
      locking.LEVEL_NODE: [self.op.node_name],
6904
      }
6905

    
6906
  def _CheckFaultyDisks(self, instance, node_name):
6907
    if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
6908
                                node_name, True):
6909
      raise errors.OpPrereqError("Instance '%s' has faulty disks on"
6910
                                 " node '%s'" % (instance.name, node_name))
6911

    
6912
  def CheckPrereq(self):
6913
    """Check prerequisites.
6914

6915
    """
6916
    storage_type = self.op.storage_type
6917

    
6918
    if (constants.SO_FIX_CONSISTENCY not in
6919
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
6920
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
6921
                                 " repaired" % storage_type)
6922

    
6923
    # Check whether any instance on this node has faulty disks
6924
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
6925
      check_nodes = set(inst.all_nodes)
6926
      check_nodes.discard(self.op.node_name)
6927
      for inst_node_name in check_nodes:
6928
        self._CheckFaultyDisks(inst, inst_node_name)
6929

    
6930
  def Exec(self, feedback_fn):
6931
    feedback_fn("Repairing storage unit '%s' on %s ..." %
6932
                (self.op.name, self.op.node_name))
6933

    
6934
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
6935
    result = self.rpc.call_storage_execute(self.op.node_name,
6936
                                           self.op.storage_type, st_args,
6937
                                           self.op.name,
6938
                                           constants.SO_FIX_CONSISTENCY)
6939
    result.Raise("Failed to repair storage unit '%s' on %s" %
6940
                 (self.op.name, self.op.node_name))
6941

    
6942

    
6943
class LUGrowDisk(LogicalUnit):
6944
  """Grow a disk of an instance.
6945

6946
  """
6947
  HPATH = "disk-grow"
6948
  HTYPE = constants.HTYPE_INSTANCE
6949
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
6950
  REQ_BGL = False
6951

    
6952
  def ExpandNames(self):
6953
    self._ExpandAndLockInstance()
6954
    self.needed_locks[locking.LEVEL_NODE] = []
6955
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6956

    
6957
  def DeclareLocks(self, level):
6958
    if level == locking.LEVEL_NODE:
6959
      self._LockInstancesNodes()
6960

    
6961
  def BuildHooksEnv(self):
6962
    """Build hooks env.
6963

6964
    This runs on the master, the primary and all the secondaries.
6965

6966
    """
6967
    env = {
6968
      "DISK": self.op.disk,
6969
      "AMOUNT": self.op.amount,
6970
      }
6971
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6972
    nl = [
6973
      self.cfg.GetMasterNode(),
6974
      self.instance.primary_node,
6975
      ]
6976
    return env, nl, nl
6977

    
6978
  def CheckPrereq(self):
6979
    """Check prerequisites.
6980

6981
    This checks that the instance is in the cluster.
6982

6983
    """
6984
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6985
    assert instance is not None, \
6986
      "Cannot retrieve locked instance %s" % self.op.instance_name
6987
    nodenames = list(instance.all_nodes)
6988
    for node in nodenames:
6989
      _CheckNodeOnline(self, node)
6990

    
6991

    
6992
    self.instance = instance
6993

    
6994
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
6995
      raise errors.OpPrereqError("Instance's disk layout does not support"
6996
                                 " growing.")
6997

    
6998
    self.disk = instance.FindDisk(self.op.disk)
6999

    
7000
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
7001
                                       instance.hypervisor)
7002
    for node in nodenames:
7003
      info = nodeinfo[node]
7004
      info.Raise("Cannot get current information from node %s" % node)
7005
      vg_free = info.payload.get('vg_free', None)
7006
      if not isinstance(vg_free, int):
7007
        raise errors.OpPrereqError("Can't compute free disk space on"
7008
                                   " node %s" % node)
7009
      if self.op.amount > vg_free:
7010
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
7011
                                   " %d MiB available, %d MiB required" %
7012
                                   (node, vg_free, self.op.amount))
7013

    
7014
  def Exec(self, feedback_fn):
7015
    """Execute disk grow.
7016

7017
    """
7018
    instance = self.instance
7019
    disk = self.disk
7020
    for node in instance.all_nodes:
7021
      self.cfg.SetDiskID(disk, node)
7022
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7023
      result.Raise("Grow request failed to node %s" % node)
7024
    disk.RecordGrow(self.op.amount)
7025
    self.cfg.Update(instance, feedback_fn)
7026
    if self.op.wait_for_sync:
7027
      disk_abort = not _WaitForSync(self, instance)
7028
      if disk_abort:
7029
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7030
                             " status.\nPlease check the instance.")
7031

    
7032

    
7033
class LUQueryInstanceData(NoHooksLU):
7034
  """Query runtime instance data.
7035

7036
  """
7037
  _OP_REQP = ["instances", "static"]
7038
  REQ_BGL = False
7039

    
7040
  def ExpandNames(self):
7041
    self.needed_locks = {}
7042
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7043

    
7044
    if not isinstance(self.op.instances, list):
7045
      raise errors.OpPrereqError("Invalid argument type 'instances'")
7046

    
7047
    if self.op.instances:
7048
      self.wanted_names = []
7049
      for name in self.op.instances:
7050
        full_name = self.cfg.ExpandInstanceName(name)
7051
        if full_name is None:
7052
          raise errors.OpPrereqError("Instance '%s' not known" % name)
7053
        self.wanted_names.append(full_name)
7054
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7055
    else:
7056
      self.wanted_names = None
7057
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7058

    
7059
    self.needed_locks[locking.LEVEL_NODE] = []
7060
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7061

    
7062
  def DeclareLocks(self, level):
7063
    if level == locking.LEVEL_NODE:
7064
      self._LockInstancesNodes()
7065

    
7066
  def CheckPrereq(self):
7067
    """Check prerequisites.
7068

7069
    This only checks the optional instance list against the existing names.
7070

7071
    """
7072
    if self.wanted_names is None:
7073
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7074

    
7075
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7076
                             in self.wanted_names]
7077
    return
7078

    
7079
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
7080
    """Returns the status of a block device
7081

7082
    """
7083
    if self.op.static or not node:
7084
      return None
7085

    
7086
    self.cfg.SetDiskID(dev, node)
7087

    
7088
    result = self.rpc.call_blockdev_find(node, dev)
7089
    if result.offline:
7090
      return None
7091

    
7092
    result.Raise("Can't compute disk status for %s" % instance_name)
7093

    
7094
    status = result.payload
7095
    if status is None:
7096
      return None
7097

    
7098
    return (status.dev_path, status.major, status.minor,
7099
            status.sync_percent, status.estimated_time,
7100
            status.is_degraded, status.ldisk_status)
7101

    
7102
  def _ComputeDiskStatus(self, instance, snode, dev):
7103
    """Compute block device status.
7104

7105
    """
7106
    if dev.dev_type in constants.LDS_DRBD:
7107
      # we change the snode then (otherwise we use the one passed in)
7108
      if dev.logical_id[0] == instance.primary_node:
7109
        snode = dev.logical_id[1]
7110
      else:
7111
        snode = dev.logical_id[0]
7112

    
7113
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
7114
                                              instance.name, dev)
7115
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
7116

    
7117
    if dev.children:
7118
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
7119
                      for child in dev.children]
7120
    else:
7121
      dev_children = []
7122

    
7123
    data = {
7124
      "iv_name": dev.iv_name,
7125
      "dev_type": dev.dev_type,
7126
      "logical_id": dev.logical_id,
7127
      "physical_id": dev.physical_id,
7128
      "pstatus": dev_pstatus,
7129
      "sstatus": dev_sstatus,
7130
      "children": dev_children,
7131
      "mode": dev.mode,
7132
      "size": dev.size,
7133
      }
7134

    
7135
    return data
7136

    
7137
  def Exec(self, feedback_fn):
7138
    """Gather and return data"""
7139
    result = {}
7140

    
7141
    cluster = self.cfg.GetClusterInfo()
7142

    
7143
    for instance in self.wanted_instances:
7144
      if not self.op.static:
7145
        remote_info = self.rpc.call_instance_info(instance.primary_node,
7146
                                                  instance.name,
7147
                                                  instance.hypervisor)
7148
        remote_info.Raise("Error checking node %s" % instance.primary_node)
7149
        remote_info = remote_info.payload
7150
        if remote_info and "state" in remote_info:
7151
          remote_state = "up"
7152
        else:
7153
          remote_state = "down"
7154
      else:
7155
        remote_state = None
7156
      if instance.admin_up:
7157
        config_state = "up"
7158
      else:
7159
        config_state = "down"
7160

    
7161
      disks = [self._ComputeDiskStatus(instance, None, device)
7162
               for device in instance.disks]
7163

    
7164
      idict = {
7165
        "name": instance.name,
7166
        "config_state": config_state,
7167
        "run_state": remote_state,
7168
        "pnode": instance.primary_node,
7169
        "snodes": instance.secondary_nodes,
7170
        "os": instance.os,
7171
        # this happens to be the same format used for hooks
7172
        "nics": _NICListToTuple(self, instance.nics),
7173
        "disks": disks,
7174
        "hypervisor": instance.hypervisor,
7175
        "network_port": instance.network_port,
7176
        "hv_instance": instance.hvparams,
7177
        "hv_actual": cluster.FillHV(instance),
7178
        "be_instance": instance.beparams,
7179
        "be_actual": cluster.FillBE(instance),
7180
        "serial_no": instance.serial_no,
7181
        "mtime": instance.mtime,
7182
        "ctime": instance.ctime,
7183
        "uuid": instance.uuid,
7184
        }
7185

    
7186
      result[instance.name] = idict
7187

    
7188
    return result
7189

    
7190

    
7191
class LUSetInstanceParams(LogicalUnit):
7192
  """Modifies an instances's parameters.
7193

7194
  """
7195
  HPATH = "instance-modify"
7196
  HTYPE = constants.HTYPE_INSTANCE
7197
  _OP_REQP = ["instance_name"]
7198
  REQ_BGL = False
7199

    
7200
  def CheckArguments(self):
7201
    if not hasattr(self.op, 'nics'):
7202
      self.op.nics = []
7203
    if not hasattr(self.op, 'disks'):
7204
      self.op.disks = []
7205
    if not hasattr(self.op, 'beparams'):
7206
      self.op.beparams = {}
7207
    if not hasattr(self.op, 'hvparams'):
7208
      self.op.hvparams = {}
7209
    self.op.force = getattr(self.op, "force", False)
7210
    if not (self.op.nics or self.op.disks or
7211
            self.op.hvparams or self.op.beparams):
7212
      raise errors.OpPrereqError("No changes submitted")
7213

    
7214
    # Disk validation
7215
    disk_addremove = 0
7216
    for disk_op, disk_dict in self.op.disks:
7217
      if disk_op == constants.DDM_REMOVE:
7218
        disk_addremove += 1
7219
        continue
7220
      elif disk_op == constants.DDM_ADD:
7221
        disk_addremove += 1
7222
      else:
7223
        if not isinstance(disk_op, int):
7224
          raise errors.OpPrereqError("Invalid disk index")
7225
        if not isinstance(disk_dict, dict):
7226
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
7227
          raise errors.OpPrereqError(msg)
7228

    
7229
      if disk_op == constants.DDM_ADD:
7230
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
7231
        if mode not in constants.DISK_ACCESS_SET:
7232
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
7233
        size = disk_dict.get('size', None)
7234
        if size is None:
7235
          raise errors.OpPrereqError("Required disk parameter size missing")
7236
        try:
7237
          size = int(size)
7238
        except ValueError, err:
7239
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
7240
                                     str(err))
7241
        disk_dict['size'] = size
7242
      else:
7243
        # modification of disk
7244
        if 'size' in disk_dict:
7245
          raise errors.OpPrereqError("Disk size change not possible, use"
7246
                                     " grow-disk")
7247

    
7248
    if disk_addremove > 1:
7249
      raise errors.OpPrereqError("Only one disk add or remove operation"
7250
                                 " supported at a time")
7251

    
7252
    # NIC validation
7253
    nic_addremove = 0
7254
    for nic_op, nic_dict in self.op.nics:
7255
      if nic_op == constants.DDM_REMOVE:
7256
        nic_addremove += 1
7257
        continue
7258
      elif nic_op == constants.DDM_ADD:
7259
        nic_addremove += 1
7260
      else:
7261
        if not isinstance(nic_op, int):
7262
          raise errors.OpPrereqError("Invalid nic index")
7263
        if not isinstance(nic_dict, dict):
7264
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
7265
          raise errors.OpPrereqError(msg)
7266

    
7267
      # nic_dict should be a dict
7268
      nic_ip = nic_dict.get('ip', None)
7269
      if nic_ip is not None:
7270
        if nic_ip.lower() == constants.VALUE_NONE:
7271
          nic_dict['ip'] = None
7272
        else:
7273
          if not utils.IsValidIP(nic_ip):
7274
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
7275

    
7276
      nic_bridge = nic_dict.get('bridge', None)
7277
      nic_link = nic_dict.get('link', None)
7278
      if nic_bridge and nic_link:
7279
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7280
                                   " at the same time")
7281
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
7282
        nic_dict['bridge'] = None
7283
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
7284
        nic_dict['link'] = None
7285

    
7286
      if nic_op == constants.DDM_ADD:
7287
        nic_mac = nic_dict.get('mac', None)
7288
        if nic_mac is None:
7289
          nic_dict['mac'] = constants.VALUE_AUTO
7290

    
7291
      if 'mac' in nic_dict:
7292
        nic_mac = nic_dict['mac']
7293
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7294
          if not utils.IsValidMac(nic_mac):
7295
            raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
7296
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
7297
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
7298
                                     " modifying an existing nic")
7299

    
7300
    if nic_addremove > 1:
7301
      raise errors.OpPrereqError("Only one NIC add or remove operation"
7302
                                 " supported at a time")
7303

    
7304
  def ExpandNames(self):
7305
    self._ExpandAndLockInstance()
7306
    self.needed_locks[locking.LEVEL_NODE] = []
7307
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7308

    
7309
  def DeclareLocks(self, level):
7310
    if level == locking.LEVEL_NODE:
7311
      self._LockInstancesNodes()
7312

    
7313
  def BuildHooksEnv(self):
7314
    """Build hooks env.
7315

7316
    This runs on the master, primary and secondaries.
7317

7318
    """
7319
    args = dict()
7320
    if constants.BE_MEMORY in self.be_new:
7321
      args['memory'] = self.be_new[constants.BE_MEMORY]
7322
    if constants.BE_VCPUS in self.be_new:
7323
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
7324
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
7325
    # information at all.
7326
    if self.op.nics:
7327
      args['nics'] = []
7328
      nic_override = dict(self.op.nics)
7329
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
7330
      for idx, nic in enumerate(self.instance.nics):
7331
        if idx in nic_override:
7332
          this_nic_override = nic_override[idx]
7333
        else:
7334
          this_nic_override = {}
7335
        if 'ip' in this_nic_override:
7336
          ip = this_nic_override['ip']
7337
        else:
7338
          ip = nic.ip
7339
        if 'mac' in this_nic_override:
7340
          mac = this_nic_override['mac']
7341
        else:
7342
          mac = nic.mac
7343
        if idx in self.nic_pnew:
7344
          nicparams = self.nic_pnew[idx]
7345
        else:
7346
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
7347
        mode = nicparams[constants.NIC_MODE]
7348
        link = nicparams[constants.NIC_LINK]
7349
        args['nics'].append((ip, mac, mode, link))
7350
      if constants.DDM_ADD in nic_override:
7351
        ip = nic_override[constants.DDM_ADD].get('ip', None)
7352
        mac = nic_override[constants.DDM_ADD]['mac']
7353
        nicparams = self.nic_pnew[constants.DDM_ADD]
7354
        mode = nicparams[constants.NIC_MODE]
7355
        link = nicparams[constants.NIC_LINK]
7356
        args['nics'].append((ip, mac, mode, link))
7357
      elif constants.DDM_REMOVE in nic_override:
7358
        del args['nics'][-1]
7359

    
7360
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
7361
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7362
    return env, nl, nl
7363

    
7364
  def _GetUpdatedParams(self, old_params, update_dict,
7365
                        default_values, parameter_types):
7366
    """Return the new params dict for the given params.
7367

7368
    @type old_params: dict
7369
    @param old_params: old parameters
7370
    @type update_dict: dict
7371
    @param update_dict: dict containing new parameter values,
7372
                        or constants.VALUE_DEFAULT to reset the
7373
                        parameter to its default value
7374
    @type default_values: dict
7375
    @param default_values: default values for the filled parameters
7376
    @type parameter_types: dict
7377
    @param parameter_types: dict mapping target dict keys to types
7378
                            in constants.ENFORCEABLE_TYPES
7379
    @rtype: (dict, dict)
7380
    @return: (new_parameters, filled_parameters)
7381

7382
    """
7383
    params_copy = copy.deepcopy(old_params)
7384
    for key, val in update_dict.iteritems():
7385
      if val == constants.VALUE_DEFAULT:
7386
        try:
7387
          del params_copy[key]
7388
        except KeyError:
7389
          pass
7390
      else:
7391
        params_copy[key] = val
7392
    utils.ForceDictType(params_copy, parameter_types)
7393
    params_filled = objects.FillDict(default_values, params_copy)
7394
    return (params_copy, params_filled)
7395

    
7396
  def CheckPrereq(self):
7397
    """Check prerequisites.
7398

7399
    This only checks the instance list against the existing names.
7400

7401
    """
7402
    self.force = self.op.force
7403

    
7404
    # checking the new params on the primary/secondary nodes
7405

    
7406
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7407
    cluster = self.cluster = self.cfg.GetClusterInfo()
7408
    assert self.instance is not None, \
7409
      "Cannot retrieve locked instance %s" % self.op.instance_name
7410
    pnode = instance.primary_node
7411
    nodelist = list(instance.all_nodes)
7412

    
7413
    # hvparams processing
7414
    if self.op.hvparams:
7415
      i_hvdict, hv_new = self._GetUpdatedParams(
7416
                             instance.hvparams, self.op.hvparams,
7417
                             cluster.hvparams[instance.hypervisor],
7418
                             constants.HVS_PARAMETER_TYPES)
7419
      # local check
7420
      hypervisor.GetHypervisor(
7421
        instance.hypervisor).CheckParameterSyntax(hv_new)
7422
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
7423
      self.hv_new = hv_new # the new actual values
7424
      self.hv_inst = i_hvdict # the new dict (without defaults)
7425
    else:
7426
      self.hv_new = self.hv_inst = {}
7427

    
7428
    # beparams processing
7429
    if self.op.beparams:
7430
      i_bedict, be_new = self._GetUpdatedParams(
7431
                             instance.beparams, self.op.beparams,
7432
                             cluster.beparams[constants.PP_DEFAULT],
7433
                             constants.BES_PARAMETER_TYPES)
7434
      self.be_new = be_new # the new actual values
7435
      self.be_inst = i_bedict # the new dict (without defaults)
7436
    else:
7437
      self.be_new = self.be_inst = {}
7438

    
7439
    self.warn = []
7440

    
7441
    if constants.BE_MEMORY in self.op.beparams and not self.force:
7442
      mem_check_list = [pnode]
7443
      if be_new[constants.BE_AUTO_BALANCE]:
7444
        # either we changed auto_balance to yes or it was from before
7445
        mem_check_list.extend(instance.secondary_nodes)
7446
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
7447
                                                  instance.hypervisor)
7448
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
7449
                                         instance.hypervisor)
7450
      pninfo = nodeinfo[pnode]
7451
      msg = pninfo.fail_msg
7452
      if msg:
7453
        # Assume the primary node is unreachable and go ahead
7454
        self.warn.append("Can't get info from primary node %s: %s" %
7455
                         (pnode,  msg))
7456
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
7457
        self.warn.append("Node data from primary node %s doesn't contain"
7458
                         " free memory information" % pnode)
7459
      elif instance_info.fail_msg:
7460
        self.warn.append("Can't get instance runtime information: %s" %
7461
                        instance_info.fail_msg)
7462
      else:
7463
        if instance_info.payload:
7464
          current_mem = int(instance_info.payload['memory'])
7465
        else:
7466
          # Assume instance not running
7467
          # (there is a slight race condition here, but it's not very probable,
7468
          # and we have no other way to check)
7469
          current_mem = 0
7470
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
7471
                    pninfo.payload['memory_free'])
7472
        if miss_mem > 0:
7473
          raise errors.OpPrereqError("This change will prevent the instance"
7474
                                     " from starting, due to %d MB of memory"
7475
                                     " missing on its primary node" % miss_mem)
7476

    
7477
      if be_new[constants.BE_AUTO_BALANCE]:
7478
        for node, nres in nodeinfo.items():
7479
          if node not in instance.secondary_nodes:
7480
            continue
7481
          msg = nres.fail_msg
7482
          if msg:
7483
            self.warn.append("Can't get info from secondary node %s: %s" %
7484
                             (node, msg))
7485
          elif not isinstance(nres.payload.get('memory_free', None), int):
7486
            self.warn.append("Secondary node %s didn't return free"
7487
                             " memory information" % node)
7488
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
7489
            self.warn.append("Not enough memory to failover instance to"
7490
                             " secondary node %s" % node)
7491

    
7492
    # NIC processing
7493
    self.nic_pnew = {}
7494
    self.nic_pinst = {}
7495
    for nic_op, nic_dict in self.op.nics:
7496
      if nic_op == constants.DDM_REMOVE:
7497
        if not instance.nics:
7498
          raise errors.OpPrereqError("Instance has no NICs, cannot remove")
7499
        continue
7500
      if nic_op != constants.DDM_ADD:
7501
        # an existing nic
7502
        if nic_op < 0 or nic_op >= len(instance.nics):
7503
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
7504
                                     " are 0 to %d" %
7505
                                     (nic_op, len(instance.nics)))
7506
        old_nic_params = instance.nics[nic_op].nicparams
7507
        old_nic_ip = instance.nics[nic_op].ip
7508
      else:
7509
        old_nic_params = {}
7510
        old_nic_ip = None
7511

    
7512
      update_params_dict = dict([(key, nic_dict[key])
7513
                                 for key in constants.NICS_PARAMETERS
7514
                                 if key in nic_dict])
7515

    
7516
      if 'bridge' in nic_dict:
7517
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
7518

    
7519
      new_nic_params, new_filled_nic_params = \
7520
          self._GetUpdatedParams(old_nic_params, update_params_dict,
7521
                                 cluster.nicparams[constants.PP_DEFAULT],
7522
                                 constants.NICS_PARAMETER_TYPES)
7523
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
7524
      self.nic_pinst[nic_op] = new_nic_params
7525
      self.nic_pnew[nic_op] = new_filled_nic_params
7526
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
7527

    
7528
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
7529
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
7530
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
7531
        if msg:
7532
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
7533
          if self.force:
7534
            self.warn.append(msg)
7535
          else:
7536
            raise errors.OpPrereqError(msg)
7537
      if new_nic_mode == constants.NIC_MODE_ROUTED:
7538
        if 'ip' in nic_dict:
7539
          nic_ip = nic_dict['ip']
7540
        else:
7541
          nic_ip = old_nic_ip
7542
        if nic_ip is None:
7543
          raise errors.OpPrereqError('Cannot set the nic ip to None'
7544
                                     ' on a routed nic')
7545
      if 'mac' in nic_dict:
7546
        nic_mac = nic_dict['mac']
7547
        if nic_mac is None:
7548
          raise errors.OpPrereqError('Cannot set the nic mac to None')
7549
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7550
          # otherwise generate the mac
7551
          nic_dict['mac'] = self.cfg.GenerateMAC()
7552
        else:
7553
          # or validate/reserve the current one
7554
          if self.cfg.IsMacInUse(nic_mac):
7555
            raise errors.OpPrereqError("MAC address %s already in use"
7556
                                       " in cluster" % nic_mac)
7557

    
7558
    # DISK processing
7559
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
7560
      raise errors.OpPrereqError("Disk operations not supported for"
7561
                                 " diskless instances")
7562
    for disk_op, disk_dict in self.op.disks:
7563
      if disk_op == constants.DDM_REMOVE:
7564
        if len(instance.disks) == 1:
7565
          raise errors.OpPrereqError("Cannot remove the last disk of"
7566
                                     " an instance")
7567
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
7568
        ins_l = ins_l[pnode]
7569
        msg = ins_l.fail_msg
7570
        if msg:
7571
          raise errors.OpPrereqError("Can't contact node %s: %s" %
7572
                                     (pnode, msg))
7573
        if instance.name in ins_l.payload:
7574
          raise errors.OpPrereqError("Instance is running, can't remove"
7575
                                     " disks.")
7576

    
7577
      if (disk_op == constants.DDM_ADD and
7578
          len(instance.nics) >= constants.MAX_DISKS):
7579
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
7580
                                   " add more" % constants.MAX_DISKS)
7581
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
7582
        # an existing disk
7583
        if disk_op < 0 or disk_op >= len(instance.disks):
7584
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
7585
                                     " are 0 to %d" %
7586
                                     (disk_op, len(instance.disks)))
7587

    
7588
    return
7589

    
7590
  def Exec(self, feedback_fn):
7591
    """Modifies an instance.
7592

7593
    All parameters take effect only at the next restart of the instance.
7594

7595
    """
7596
    # Process here the warnings from CheckPrereq, as we don't have a
7597
    # feedback_fn there.
7598
    for warn in self.warn:
7599
      feedback_fn("WARNING: %s" % warn)
7600

    
7601
    result = []
7602
    instance = self.instance
7603
    cluster = self.cluster
7604
    # disk changes
7605
    for disk_op, disk_dict in self.op.disks:
7606
      if disk_op == constants.DDM_REMOVE:
7607
        # remove the last disk
7608
        device = instance.disks.pop()
7609
        device_idx = len(instance.disks)
7610
        for node, disk in device.ComputeNodeTree(instance.primary_node):
7611
          self.cfg.SetDiskID(disk, node)
7612
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
7613
          if msg:
7614
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
7615
                            " continuing anyway", device_idx, node, msg)
7616
        result.append(("disk/%d" % device_idx, "remove"))
7617
      elif disk_op == constants.DDM_ADD:
7618
        # add a new disk
7619
        if instance.disk_template == constants.DT_FILE:
7620
          file_driver, file_path = instance.disks[0].logical_id
7621
          file_path = os.path.dirname(file_path)
7622
        else:
7623
          file_driver = file_path = None
7624
        disk_idx_base = len(instance.disks)
7625
        new_disk = _GenerateDiskTemplate(self,
7626
                                         instance.disk_template,
7627
                                         instance.name, instance.primary_node,
7628
                                         instance.secondary_nodes,
7629
                                         [disk_dict],
7630
                                         file_path,
7631
                                         file_driver,
7632
                                         disk_idx_base)[0]
7633
        instance.disks.append(new_disk)
7634
        info = _GetInstanceInfoText(instance)
7635

    
7636
        logging.info("Creating volume %s for instance %s",
7637
                     new_disk.iv_name, instance.name)
7638
        # Note: this needs to be kept in sync with _CreateDisks
7639
        #HARDCODE
7640
        for node in instance.all_nodes:
7641
          f_create = node == instance.primary_node
7642
          try:
7643
            _CreateBlockDev(self, node, instance, new_disk,
7644
                            f_create, info, f_create)
7645
          except errors.OpExecError, err:
7646
            self.LogWarning("Failed to create volume %s (%s) on"
7647
                            " node %s: %s",
7648
                            new_disk.iv_name, new_disk, node, err)
7649
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
7650
                       (new_disk.size, new_disk.mode)))
7651
      else:
7652
        # change a given disk
7653
        instance.disks[disk_op].mode = disk_dict['mode']
7654
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
7655
    # NIC changes
7656
    for nic_op, nic_dict in self.op.nics:
7657
      if nic_op == constants.DDM_REMOVE:
7658
        # remove the last nic
7659
        del instance.nics[-1]
7660
        result.append(("nic.%d" % len(instance.nics), "remove"))
7661
      elif nic_op == constants.DDM_ADD:
7662
        # mac and bridge should be set, by now
7663
        mac = nic_dict['mac']
7664
        ip = nic_dict.get('ip', None)
7665
        nicparams = self.nic_pinst[constants.DDM_ADD]
7666
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
7667
        instance.nics.append(new_nic)
7668
        result.append(("nic.%d" % (len(instance.nics) - 1),
7669
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
7670
                       (new_nic.mac, new_nic.ip,
7671
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
7672
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
7673
                       )))
7674
      else:
7675
        for key in 'mac', 'ip':
7676
          if key in nic_dict:
7677
            setattr(instance.nics[nic_op], key, nic_dict[key])
7678
        if nic_op in self.nic_pnew:
7679
          instance.nics[nic_op].nicparams = self.nic_pnew[nic_op]
7680
        for key, val in nic_dict.iteritems():
7681
          result.append(("nic.%s/%d" % (key, nic_op), val))
7682

    
7683
    # hvparams changes
7684
    if self.op.hvparams:
7685
      instance.hvparams = self.hv_inst
7686
      for key, val in self.op.hvparams.iteritems():
7687
        result.append(("hv/%s" % key, val))
7688

    
7689
    # beparams changes
7690
    if self.op.beparams:
7691
      instance.beparams = self.be_inst
7692
      for key, val in self.op.beparams.iteritems():
7693
        result.append(("be/%s" % key, val))
7694

    
7695
    self.cfg.Update(instance, feedback_fn)
7696

    
7697
    return result
7698

    
7699

    
7700
class LUQueryExports(NoHooksLU):
7701
  """Query the exports list
7702

7703
  """
7704
  _OP_REQP = ['nodes']
7705
  REQ_BGL = False
7706

    
7707
  def ExpandNames(self):
7708
    self.needed_locks = {}
7709
    self.share_locks[locking.LEVEL_NODE] = 1
7710
    if not self.op.nodes:
7711
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7712
    else:
7713
      self.needed_locks[locking.LEVEL_NODE] = \
7714
        _GetWantedNodes(self, self.op.nodes)
7715

    
7716
  def CheckPrereq(self):
7717
    """Check prerequisites.
7718

7719
    """
7720
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
7721

    
7722
  def Exec(self, feedback_fn):
7723
    """Compute the list of all the exported system images.
7724

7725
    @rtype: dict
7726
    @return: a dictionary with the structure node->(export-list)
7727
        where export-list is a list of the instances exported on
7728
        that node.
7729

7730
    """
7731
    rpcresult = self.rpc.call_export_list(self.nodes)
7732
    result = {}
7733
    for node in rpcresult:
7734
      if rpcresult[node].fail_msg:
7735
        result[node] = False
7736
      else:
7737
        result[node] = rpcresult[node].payload
7738

    
7739
    return result
7740

    
7741

    
7742
class LUExportInstance(LogicalUnit):
7743
  """Export an instance to an image in the cluster.
7744

7745
  """
7746
  HPATH = "instance-export"
7747
  HTYPE = constants.HTYPE_INSTANCE
7748
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
7749
  REQ_BGL = False
7750

    
7751
  def CheckArguments(self):
7752
    """Check the arguments.
7753

7754
    """
7755
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
7756
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
7757

    
7758
  def ExpandNames(self):
7759
    self._ExpandAndLockInstance()
7760
    # FIXME: lock only instance primary and destination node
7761
    #
7762
    # Sad but true, for now we have do lock all nodes, as we don't know where
7763
    # the previous export might be, and and in this LU we search for it and
7764
    # remove it from its current node. In the future we could fix this by:
7765
    #  - making a tasklet to search (share-lock all), then create the new one,
7766
    #    then one to remove, after
7767
    #  - removing the removal operation altogether
7768
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7769

    
7770
  def DeclareLocks(self, level):
7771
    """Last minute lock declaration."""
7772
    # All nodes are locked anyway, so nothing to do here.
7773

    
7774
  def BuildHooksEnv(self):
7775
    """Build hooks env.
7776

7777
    This will run on the master, primary node and target node.
7778

7779
    """
7780
    env = {
7781
      "EXPORT_NODE": self.op.target_node,
7782
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
7783
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
7784
      }
7785
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7786
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
7787
          self.op.target_node]
7788
    return env, nl, nl
7789

    
7790
  def CheckPrereq(self):
7791
    """Check prerequisites.
7792

7793
    This checks that the instance and node names are valid.
7794

7795
    """
7796
    instance_name = self.op.instance_name
7797
    self.instance = self.cfg.GetInstanceInfo(instance_name)
7798
    assert self.instance is not None, \
7799
          "Cannot retrieve locked instance %s" % self.op.instance_name
7800
    _CheckNodeOnline(self, self.instance.primary_node)
7801

    
7802
    self.dst_node = self.cfg.GetNodeInfo(
7803
      self.cfg.ExpandNodeName(self.op.target_node))
7804

    
7805
    if self.dst_node is None:
7806
      # This is wrong node name, not a non-locked node
7807
      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
7808
    _CheckNodeOnline(self, self.dst_node.name)
7809
    _CheckNodeNotDrained(self, self.dst_node.name)
7810

    
7811
    # instance disk type verification
7812
    for disk in self.instance.disks:
7813
      if disk.dev_type == constants.LD_FILE:
7814
        raise errors.OpPrereqError("Export not supported for instances with"
7815
                                   " file-based disks")
7816

    
7817
  def Exec(self, feedback_fn):
7818
    """Export an instance to an image in the cluster.
7819

7820
    """
7821
    instance = self.instance
7822
    dst_node = self.dst_node
7823
    src_node = instance.primary_node
7824

    
7825
    if self.op.shutdown:
7826
      # shutdown the instance, but not the disks
7827
      feedback_fn("Shutting down instance %s" % instance.name)
7828
      result = self.rpc.call_instance_shutdown(src_node, instance,
7829
                                               self.shutdown_timeout)
7830
      result.Raise("Could not shutdown instance %s on"
7831
                   " node %s" % (instance.name, src_node))
7832

    
7833
    vgname = self.cfg.GetVGName()
7834

    
7835
    snap_disks = []
7836

    
7837
    # set the disks ID correctly since call_instance_start needs the
7838
    # correct drbd minor to create the symlinks
7839
    for disk in instance.disks:
7840
      self.cfg.SetDiskID(disk, src_node)
7841

    
7842
    activate_disks = (not instance.admin_up)
7843

    
7844
    if activate_disks:
7845
      # Activate the instance disks if we'exporting a stopped instance
7846
      feedback_fn("Activating disks for %s" % instance.name)
7847
      _StartInstanceDisks(self, instance, None)
7848

    
7849
    try:
7850
      # per-disk results
7851
      dresults = []
7852
      try:
7853
        for idx, disk in enumerate(instance.disks):
7854
          feedback_fn("Creating a snapshot of disk/%s on node %s" %
7855
                      (idx, src_node))
7856

    
7857
          # result.payload will be a snapshot of an lvm leaf of the one we
7858
          # passed
7859
          result = self.rpc.call_blockdev_snapshot(src_node, disk)
7860
          msg = result.fail_msg
7861
          if msg:
7862
            self.LogWarning("Could not snapshot disk/%s on node %s: %s",
7863
                            idx, src_node, msg)
7864
            snap_disks.append(False)
7865
          else:
7866
            disk_id = (vgname, result.payload)
7867
            new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
7868
                                   logical_id=disk_id, physical_id=disk_id,
7869
                                   iv_name=disk.iv_name)
7870
            snap_disks.append(new_dev)
7871

    
7872
      finally:
7873
        if self.op.shutdown and instance.admin_up:
7874
          feedback_fn("Starting instance %s" % instance.name)
7875
          result = self.rpc.call_instance_start(src_node, instance, None, None)
7876
          msg = result.fail_msg
7877
          if msg:
7878
            _ShutdownInstanceDisks(self, instance)
7879
            raise errors.OpExecError("Could not start instance: %s" % msg)
7880

    
7881
      # TODO: check for size
7882

    
7883
      cluster_name = self.cfg.GetClusterName()
7884
      for idx, dev in enumerate(snap_disks):
7885
        feedback_fn("Exporting snapshot %s from %s to %s" %
7886
                    (idx, src_node, dst_node.name))
7887
        if dev:
7888
          result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
7889
                                                 instance, cluster_name, idx)
7890
          msg = result.fail_msg
7891
          if msg:
7892
            self.LogWarning("Could not export disk/%s from node %s to"
7893
                            " node %s: %s", idx, src_node, dst_node.name, msg)
7894
            dresults.append(False)
7895
          else:
7896
            dresults.append(True)
7897
          msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
7898
          if msg:
7899
            self.LogWarning("Could not remove snapshot for disk/%d from node"
7900
                            " %s: %s", idx, src_node, msg)
7901
        else:
7902
          dresults.append(False)
7903

    
7904
      feedback_fn("Finalizing export on %s" % dst_node.name)
7905
      result = self.rpc.call_finalize_export(dst_node.name, instance,
7906
                                             snap_disks)
7907
      fin_resu = True
7908
      msg = result.fail_msg
7909
      if msg:
7910
        self.LogWarning("Could not finalize export for instance %s"
7911
                        " on node %s: %s", instance.name, dst_node.name, msg)
7912
        fin_resu = False
7913

    
7914
    finally:
7915
      if activate_disks:
7916
        feedback_fn("Deactivating disks for %s" % instance.name)
7917
        _ShutdownInstanceDisks(self, instance)
7918

    
7919
    nodelist = self.cfg.GetNodeList()
7920
    nodelist.remove(dst_node.name)
7921

    
7922
    # on one-node clusters nodelist will be empty after the removal
7923
    # if we proceed the backup would be removed because OpQueryExports
7924
    # substitutes an empty list with the full cluster node list.
7925
    iname = instance.name
7926
    if nodelist:
7927
      feedback_fn("Removing old exports for instance %s" % iname)
7928
      exportlist = self.rpc.call_export_list(nodelist)
7929
      for node in exportlist:
7930
        if exportlist[node].fail_msg:
7931
          continue
7932
        if iname in exportlist[node].payload:
7933
          msg = self.rpc.call_export_remove(node, iname).fail_msg
7934
          if msg:
7935
            self.LogWarning("Could not remove older export for instance %s"
7936
                            " on node %s: %s", iname, node, msg)
7937
    return fin_resu, dresults
7938

    
7939

    
7940
class LURemoveExport(NoHooksLU):
7941
  """Remove exports related to the named instance.
7942

7943
  """
7944
  _OP_REQP = ["instance_name"]
7945
  REQ_BGL = False
7946

    
7947
  def ExpandNames(self):
7948
    self.needed_locks = {}
7949
    # We need all nodes to be locked in order for RemoveExport to work, but we
7950
    # don't need to lock the instance itself, as nothing will happen to it (and
7951
    # we can remove exports also for a removed instance)
7952
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7953

    
7954
  def CheckPrereq(self):
7955
    """Check prerequisites.
7956
    """
7957
    pass
7958

    
7959
  def Exec(self, feedback_fn):
7960
    """Remove any export.
7961

7962
    """
7963
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
7964
    # If the instance was not found we'll try with the name that was passed in.
7965
    # This will only work if it was an FQDN, though.
7966
    fqdn_warn = False
7967
    if not instance_name:
7968
      fqdn_warn = True
7969
      instance_name = self.op.instance_name
7970

    
7971
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7972
    exportlist = self.rpc.call_export_list(locked_nodes)
7973
    found = False
7974
    for node in exportlist:
7975
      msg = exportlist[node].fail_msg
7976
      if msg:
7977
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
7978
        continue
7979
      if instance_name in exportlist[node].payload:
7980
        found = True
7981
        result = self.rpc.call_export_remove(node, instance_name)
7982
        msg = result.fail_msg
7983
        if msg:
7984
          logging.error("Could not remove export for instance %s"
7985
                        " on node %s: %s", instance_name, node, msg)
7986

    
7987
    if fqdn_warn and not found:
7988
      feedback_fn("Export not found. If trying to remove an export belonging"
7989
                  " to a deleted instance please use its Fully Qualified"
7990
                  " Domain Name.")
7991

    
7992

    
7993
class TagsLU(NoHooksLU):
7994
  """Generic tags LU.
7995

7996
  This is an abstract class which is the parent of all the other tags LUs.
7997

7998
  """
7999

    
8000
  def ExpandNames(self):
8001
    self.needed_locks = {}
8002
    if self.op.kind == constants.TAG_NODE:
8003
      name = self.cfg.ExpandNodeName(self.op.name)
8004
      if name is None:
8005
        raise errors.OpPrereqError("Invalid node name (%s)" %
8006
                                   (self.op.name,))
8007
      self.op.name = name
8008
      self.needed_locks[locking.LEVEL_NODE] = name
8009
    elif self.op.kind == constants.TAG_INSTANCE:
8010
      name = self.cfg.ExpandInstanceName(self.op.name)
8011
      if name is None:
8012
        raise errors.OpPrereqError("Invalid instance name (%s)" %
8013
                                   (self.op.name,))
8014
      self.op.name = name
8015
      self.needed_locks[locking.LEVEL_INSTANCE] = name
8016

    
8017
  def CheckPrereq(self):
8018
    """Check prerequisites.
8019

8020
    """
8021
    if self.op.kind == constants.TAG_CLUSTER:
8022
      self.target = self.cfg.GetClusterInfo()
8023
    elif self.op.kind == constants.TAG_NODE:
8024
      self.target = self.cfg.GetNodeInfo(self.op.name)
8025
    elif self.op.kind == constants.TAG_INSTANCE:
8026
      self.target = self.cfg.GetInstanceInfo(self.op.name)
8027
    else:
8028
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
8029
                                 str(self.op.kind))
8030

    
8031

    
8032
class LUGetTags(TagsLU):
8033
  """Returns the tags of a given object.
8034

8035
  """
8036
  _OP_REQP = ["kind", "name"]
8037
  REQ_BGL = False
8038

    
8039
  def Exec(self, feedback_fn):
8040
    """Returns the tag list.
8041

8042
    """
8043
    return list(self.target.GetTags())
8044

    
8045

    
8046
class LUSearchTags(NoHooksLU):
8047
  """Searches the tags for a given pattern.
8048

8049
  """
8050
  _OP_REQP = ["pattern"]
8051
  REQ_BGL = False
8052

    
8053
  def ExpandNames(self):
8054
    self.needed_locks = {}
8055

    
8056
  def CheckPrereq(self):
8057
    """Check prerequisites.
8058

8059
    This checks the pattern passed for validity by compiling it.
8060

8061
    """
8062
    try:
8063
      self.re = re.compile(self.op.pattern)
8064
    except re.error, err:
8065
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
8066
                                 (self.op.pattern, err))
8067

    
8068
  def Exec(self, feedback_fn):
8069
    """Returns the tag list.
8070

8071
    """
8072
    cfg = self.cfg
8073
    tgts = [("/cluster", cfg.GetClusterInfo())]
8074
    ilist = cfg.GetAllInstancesInfo().values()
8075
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
8076
    nlist = cfg.GetAllNodesInfo().values()
8077
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
8078
    results = []
8079
    for path, target in tgts:
8080
      for tag in target.GetTags():
8081
        if self.re.search(tag):
8082
          results.append((path, tag))
8083
    return results
8084

    
8085

    
8086
class LUAddTags(TagsLU):
8087
  """Sets a tag on a given object.
8088

8089
  """
8090
  _OP_REQP = ["kind", "name", "tags"]
8091
  REQ_BGL = False
8092

    
8093
  def CheckPrereq(self):
8094
    """Check prerequisites.
8095

8096
    This checks the type and length of the tag name and value.
8097

8098
    """
8099
    TagsLU.CheckPrereq(self)
8100
    for tag in self.op.tags:
8101
      objects.TaggableObject.ValidateTag(tag)
8102

    
8103
  def Exec(self, feedback_fn):
8104
    """Sets the tag.
8105

8106
    """
8107
    try:
8108
      for tag in self.op.tags:
8109
        self.target.AddTag(tag)
8110
    except errors.TagError, err:
8111
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
8112
    try:
8113
      self.cfg.Update(self.target, feedback_fn)
8114
    except errors.ConfigurationError:
8115
      raise errors.OpRetryError("There has been a modification to the"
8116
                                " config file and the operation has been"
8117
                                " aborted. Please retry.")
8118

    
8119

    
8120
class LUDelTags(TagsLU):
8121
  """Delete a list of tags from a given object.
8122

8123
  """
8124
  _OP_REQP = ["kind", "name", "tags"]
8125
  REQ_BGL = False
8126

    
8127
  def CheckPrereq(self):
8128
    """Check prerequisites.
8129

8130
    This checks that we have the given tag.
8131

8132
    """
8133
    TagsLU.CheckPrereq(self)
8134
    for tag in self.op.tags:
8135
      objects.TaggableObject.ValidateTag(tag)
8136
    del_tags = frozenset(self.op.tags)
8137
    cur_tags = self.target.GetTags()
8138
    if not del_tags <= cur_tags:
8139
      diff_tags = del_tags - cur_tags
8140
      diff_names = ["'%s'" % tag for tag in diff_tags]
8141
      diff_names.sort()
8142
      raise errors.OpPrereqError("Tag(s) %s not found" %
8143
                                 (",".join(diff_names)))
8144

    
8145
  def Exec(self, feedback_fn):
8146
    """Remove the tag from the object.
8147

8148
    """
8149
    for tag in self.op.tags:
8150
      self.target.RemoveTag(tag)
8151
    try:
8152
      self.cfg.Update(self.target, feedback_fn)
8153
    except errors.ConfigurationError:
8154
      raise errors.OpRetryError("There has been a modification to the"
8155
                                " config file and the operation has been"
8156
                                " aborted. Please retry.")
8157

    
8158

    
8159
class LUTestDelay(NoHooksLU):
8160
  """Sleep for a specified amount of time.
8161

8162
  This LU sleeps on the master and/or nodes for a specified amount of
8163
  time.
8164

8165
  """
8166
  _OP_REQP = ["duration", "on_master", "on_nodes"]
8167
  REQ_BGL = False
8168

    
8169
  def ExpandNames(self):
8170
    """Expand names and set required locks.
8171

8172
    This expands the node list, if any.
8173

8174
    """
8175
    self.needed_locks = {}
8176
    if self.op.on_nodes:
8177
      # _GetWantedNodes can be used here, but is not always appropriate to use
8178
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
8179
      # more information.
8180
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
8181
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
8182

    
8183
  def CheckPrereq(self):
8184
    """Check prerequisites.
8185

8186
    """
8187

    
8188
  def Exec(self, feedback_fn):
8189
    """Do the actual sleep.
8190

8191
    """
8192
    if self.op.on_master:
8193
      if not utils.TestDelay(self.op.duration):
8194
        raise errors.OpExecError("Error during master delay test")
8195
    if self.op.on_nodes:
8196
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
8197
      for node, node_result in result.items():
8198
        node_result.Raise("Failure during rpc call to node %s" % node)
8199

    
8200

    
8201
class IAllocator(object):
8202
  """IAllocator framework.
8203

8204
  An IAllocator instance has three sets of attributes:
8205
    - cfg that is needed to query the cluster
8206
    - input data (all members of the _KEYS class attribute are required)
8207
    - four buffer attributes (in|out_data|text), that represent the
8208
      input (to the external script) in text and data structure format,
8209
      and the output from it, again in two formats
8210
    - the result variables from the script (success, info, nodes) for
8211
      easy usage
8212

8213
  """
8214
  _ALLO_KEYS = [
8215
    "mem_size", "disks", "disk_template",
8216
    "os", "tags", "nics", "vcpus", "hypervisor",
8217
    ]
8218
  _RELO_KEYS = [
8219
    "relocate_from",
8220
    ]
8221

    
8222
  def __init__(self, cfg, rpc, mode, name, **kwargs):
8223
    self.cfg = cfg
8224
    self.rpc = rpc
8225
    # init buffer variables
8226
    self.in_text = self.out_text = self.in_data = self.out_data = None
8227
    # init all input fields so that pylint is happy
8228
    self.mode = mode
8229
    self.name = name
8230
    self.mem_size = self.disks = self.disk_template = None
8231
    self.os = self.tags = self.nics = self.vcpus = None
8232
    self.hypervisor = None
8233
    self.relocate_from = None
8234
    # computed fields
8235
    self.required_nodes = None
8236
    # init result fields
8237
    self.success = self.info = self.nodes = None
8238
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8239
      keyset = self._ALLO_KEYS
8240
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8241
      keyset = self._RELO_KEYS
8242
    else:
8243
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
8244
                                   " IAllocator" % self.mode)
8245
    for key in kwargs:
8246
      if key not in keyset:
8247
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
8248
                                     " IAllocator" % key)
8249
      setattr(self, key, kwargs[key])
8250
    for key in keyset:
8251
      if key not in kwargs:
8252
        raise errors.ProgrammerError("Missing input parameter '%s' to"
8253
                                     " IAllocator" % key)
8254
    self._BuildInputData()
8255

    
8256
  def _ComputeClusterData(self):
8257
    """Compute the generic allocator input data.
8258

8259
    This is the data that is independent of the actual operation.
8260

8261
    """
8262
    cfg = self.cfg
8263
    cluster_info = cfg.GetClusterInfo()
8264
    # cluster data
8265
    data = {
8266
      "version": constants.IALLOCATOR_VERSION,
8267
      "cluster_name": cfg.GetClusterName(),
8268
      "cluster_tags": list(cluster_info.GetTags()),
8269
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
8270
      # we don't have job IDs
8271
      }
8272
    iinfo = cfg.GetAllInstancesInfo().values()
8273
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
8274

    
8275
    # node data
8276
    node_results = {}
8277
    node_list = cfg.GetNodeList()
8278

    
8279
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8280
      hypervisor_name = self.hypervisor
8281
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8282
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
8283

    
8284
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
8285
                                        hypervisor_name)
8286
    node_iinfo = \
8287
      self.rpc.call_all_instances_info(node_list,
8288
                                       cluster_info.enabled_hypervisors)
8289
    for nname, nresult in node_data.items():
8290
      # first fill in static (config-based) values
8291
      ninfo = cfg.GetNodeInfo(nname)
8292
      pnr = {
8293
        "tags": list(ninfo.GetTags()),
8294
        "primary_ip": ninfo.primary_ip,
8295
        "secondary_ip": ninfo.secondary_ip,
8296
        "offline": ninfo.offline,
8297
        "drained": ninfo.drained,
8298
        "master_candidate": ninfo.master_candidate,
8299
        }
8300

    
8301
      if not (ninfo.offline or ninfo.drained):
8302
        nresult.Raise("Can't get data for node %s" % nname)
8303
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
8304
                                nname)
8305
        remote_info = nresult.payload
8306

    
8307
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
8308
                     'vg_size', 'vg_free', 'cpu_total']:
8309
          if attr not in remote_info:
8310
            raise errors.OpExecError("Node '%s' didn't return attribute"
8311
                                     " '%s'" % (nname, attr))
8312
          if not isinstance(remote_info[attr], int):
8313
            raise errors.OpExecError("Node '%s' returned invalid value"
8314
                                     " for '%s': %s" %
8315
                                     (nname, attr, remote_info[attr]))
8316
        # compute memory used by primary instances
8317
        i_p_mem = i_p_up_mem = 0
8318
        for iinfo, beinfo in i_list:
8319
          if iinfo.primary_node == nname:
8320
            i_p_mem += beinfo[constants.BE_MEMORY]
8321
            if iinfo.name not in node_iinfo[nname].payload:
8322
              i_used_mem = 0
8323
            else:
8324
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
8325
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
8326
            remote_info['memory_free'] -= max(0, i_mem_diff)
8327

    
8328
            if iinfo.admin_up:
8329
              i_p_up_mem += beinfo[constants.BE_MEMORY]
8330

    
8331
        # compute memory used by instances
8332
        pnr_dyn = {
8333
          "total_memory": remote_info['memory_total'],
8334
          "reserved_memory": remote_info['memory_dom0'],
8335
          "free_memory": remote_info['memory_free'],
8336
          "total_disk": remote_info['vg_size'],
8337
          "free_disk": remote_info['vg_free'],
8338
          "total_cpus": remote_info['cpu_total'],
8339
          "i_pri_memory": i_p_mem,
8340
          "i_pri_up_memory": i_p_up_mem,
8341
          }
8342
        pnr.update(pnr_dyn)
8343

    
8344
      node_results[nname] = pnr
8345
    data["nodes"] = node_results
8346

    
8347
    # instance data
8348
    instance_data = {}
8349
    for iinfo, beinfo in i_list:
8350
      nic_data = []
8351
      for nic in iinfo.nics:
8352
        filled_params = objects.FillDict(
8353
            cluster_info.nicparams[constants.PP_DEFAULT],
8354
            nic.nicparams)
8355
        nic_dict = {"mac": nic.mac,
8356
                    "ip": nic.ip,
8357
                    "mode": filled_params[constants.NIC_MODE],
8358
                    "link": filled_params[constants.NIC_LINK],
8359
                   }
8360
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
8361
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
8362
        nic_data.append(nic_dict)
8363
      pir = {
8364
        "tags": list(iinfo.GetTags()),
8365
        "admin_up": iinfo.admin_up,
8366
        "vcpus": beinfo[constants.BE_VCPUS],
8367
        "memory": beinfo[constants.BE_MEMORY],
8368
        "os": iinfo.os,
8369
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
8370
        "nics": nic_data,
8371
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
8372
        "disk_template": iinfo.disk_template,
8373
        "hypervisor": iinfo.hypervisor,
8374
        }
8375
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
8376
                                                 pir["disks"])
8377
      instance_data[iinfo.name] = pir
8378

    
8379
    data["instances"] = instance_data
8380

    
8381
    self.in_data = data
8382

    
8383
  def _AddNewInstance(self):
8384
    """Add new instance data to allocator structure.
8385

8386
    This in combination with _AllocatorGetClusterData will create the
8387
    correct structure needed as input for the allocator.
8388

8389
    The checks for the completeness of the opcode must have already been
8390
    done.
8391

8392
    """
8393
    data = self.in_data
8394

    
8395
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
8396

    
8397
    if self.disk_template in constants.DTS_NET_MIRROR:
8398
      self.required_nodes = 2
8399
    else:
8400
      self.required_nodes = 1
8401
    request = {
8402
      "type": "allocate",
8403
      "name": self.name,
8404
      "disk_template": self.disk_template,
8405
      "tags": self.tags,
8406
      "os": self.os,
8407
      "vcpus": self.vcpus,
8408
      "memory": self.mem_size,
8409
      "disks": self.disks,
8410
      "disk_space_total": disk_space,
8411
      "nics": self.nics,
8412
      "required_nodes": self.required_nodes,
8413
      }
8414
    data["request"] = request
8415

    
8416
  def _AddRelocateInstance(self):
8417
    """Add relocate instance data to allocator structure.
8418

8419
    This in combination with _IAllocatorGetClusterData will create the
8420
    correct structure needed as input for the allocator.
8421

8422
    The checks for the completeness of the opcode must have already been
8423
    done.
8424

8425
    """
8426
    instance = self.cfg.GetInstanceInfo(self.name)
8427
    if instance is None:
8428
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
8429
                                   " IAllocator" % self.name)
8430

    
8431
    if instance.disk_template not in constants.DTS_NET_MIRROR:
8432
      raise errors.OpPrereqError("Can't relocate non-mirrored instances")
8433

    
8434
    if len(instance.secondary_nodes) != 1:
8435
      raise errors.OpPrereqError("Instance has not exactly one secondary node")
8436

    
8437
    self.required_nodes = 1
8438
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
8439
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
8440

    
8441
    request = {
8442
      "type": "relocate",
8443
      "name": self.name,
8444
      "disk_space_total": disk_space,
8445
      "required_nodes": self.required_nodes,
8446
      "relocate_from": self.relocate_from,
8447
      }
8448
    self.in_data["request"] = request
8449

    
8450
  def _BuildInputData(self):
8451
    """Build input data structures.
8452

8453
    """
8454
    self._ComputeClusterData()
8455

    
8456
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8457
      self._AddNewInstance()
8458
    else:
8459
      self._AddRelocateInstance()
8460

    
8461
    self.in_text = serializer.Dump(self.in_data)
8462

    
8463
  def Run(self, name, validate=True, call_fn=None):
8464
    """Run an instance allocator and return the results.
8465

8466
    """
8467
    if call_fn is None:
8468
      call_fn = self.rpc.call_iallocator_runner
8469

    
8470
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
8471
    result.Raise("Failure while running the iallocator script")
8472

    
8473
    self.out_text = result.payload
8474
    if validate:
8475
      self._ValidateResult()
8476

    
8477
  def _ValidateResult(self):
8478
    """Process the allocator results.
8479

8480
    This will process and if successful save the result in
8481
    self.out_data and the other parameters.
8482

8483
    """
8484
    try:
8485
      rdict = serializer.Load(self.out_text)
8486
    except Exception, err:
8487
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
8488

    
8489
    if not isinstance(rdict, dict):
8490
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
8491

    
8492
    for key in "success", "info", "nodes":
8493
      if key not in rdict:
8494
        raise errors.OpExecError("Can't parse iallocator results:"
8495
                                 " missing key '%s'" % key)
8496
      setattr(self, key, rdict[key])
8497

    
8498
    if not isinstance(rdict["nodes"], list):
8499
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
8500
                               " is not a list")
8501
    self.out_data = rdict
8502

    
8503

    
8504
class LUTestAllocator(NoHooksLU):
8505
  """Run allocator tests.
8506

8507
  This LU runs the allocator tests
8508

8509
  """
8510
  _OP_REQP = ["direction", "mode", "name"]
8511

    
8512
  def CheckPrereq(self):
8513
    """Check prerequisites.
8514

8515
    This checks the opcode parameters depending on the director and mode test.
8516

8517
    """
8518
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8519
      for attr in ["name", "mem_size", "disks", "disk_template",
8520
                   "os", "tags", "nics", "vcpus"]:
8521
        if not hasattr(self.op, attr):
8522
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
8523
                                     attr)
8524
      iname = self.cfg.ExpandInstanceName(self.op.name)
8525
      if iname is not None:
8526
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
8527
                                   iname)
8528
      if not isinstance(self.op.nics, list):
8529
        raise errors.OpPrereqError("Invalid parameter 'nics'")
8530
      for row in self.op.nics:
8531
        if (not isinstance(row, dict) or
8532
            "mac" not in row or
8533
            "ip" not in row or
8534
            "bridge" not in row):
8535
          raise errors.OpPrereqError("Invalid contents of the"
8536
                                     " 'nics' parameter")
8537
      if not isinstance(self.op.disks, list):
8538
        raise errors.OpPrereqError("Invalid parameter 'disks'")
8539
      for row in self.op.disks:
8540
        if (not isinstance(row, dict) or
8541
            "size" not in row or
8542
            not isinstance(row["size"], int) or
8543
            "mode" not in row or
8544
            row["mode"] not in ['r', 'w']):
8545
          raise errors.OpPrereqError("Invalid contents of the"
8546
                                     " 'disks' parameter")
8547
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
8548
        self.op.hypervisor = self.cfg.GetHypervisorType()
8549
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
8550
      if not hasattr(self.op, "name"):
8551
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
8552
      fname = self.cfg.ExpandInstanceName(self.op.name)
8553
      if fname is None:
8554
        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
8555
                                   self.op.name)
8556
      self.op.name = fname
8557
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
8558
    else:
8559
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
8560
                                 self.op.mode)
8561

    
8562
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
8563
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
8564
        raise errors.OpPrereqError("Missing allocator name")
8565
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
8566
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
8567
                                 self.op.direction)
8568

    
8569
  def Exec(self, feedback_fn):
8570
    """Run the allocator test.
8571

8572
    """
8573
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8574
      ial = IAllocator(self.cfg, self.rpc,
8575
                       mode=self.op.mode,
8576
                       name=self.op.name,
8577
                       mem_size=self.op.mem_size,
8578
                       disks=self.op.disks,
8579
                       disk_template=self.op.disk_template,
8580
                       os=self.op.os,
8581
                       tags=self.op.tags,
8582
                       nics=self.op.nics,
8583
                       vcpus=self.op.vcpus,
8584
                       hypervisor=self.op.hypervisor,
8585
                       )
8586
    else:
8587
      ial = IAllocator(self.cfg, self.rpc,
8588
                       mode=self.op.mode,
8589
                       name=self.op.name,
8590
                       relocate_from=list(self.relocate_from),
8591
                       )
8592

    
8593
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
8594
      result = ial.in_text
8595
    else:
8596
      ial.Run(self.op.allocator, validate=False)
8597
      result = ial.out_text
8598
    return result