Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 1df79ce6

History | View | Annotate | Download (295.8 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0613,W0201
25

    
26
import os
27
import os.path
28
import time
29
import re
30
import platform
31
import logging
32
import copy
33

    
34
from ganeti import ssh
35
from ganeti import utils
36
from ganeti import errors
37
from ganeti import hypervisor
38
from ganeti import locking
39
from ganeti import constants
40
from ganeti import objects
41
from ganeti import serializer
42
from ganeti import ssconf
43

    
44

    
45
class LogicalUnit(object):
46
  """Logical Unit base class.
47

48
  Subclasses must follow these rules:
49
    - implement ExpandNames
50
    - implement CheckPrereq (except when tasklets are used)
51
    - implement Exec (except when tasklets are used)
52
    - implement BuildHooksEnv
53
    - redefine HPATH and HTYPE
54
    - optionally redefine their run requirements:
55
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
56

57
  Note that all commands require root permissions.
58

59
  @ivar dry_run_result: the value (if any) that will be returned to the caller
60
      in dry-run mode (signalled by opcode dry_run parameter)
61

62
  """
63
  HPATH = None
64
  HTYPE = None
65
  _OP_REQP = []
66
  REQ_BGL = True
67

    
68
  def __init__(self, processor, op, context, rpc):
69
    """Constructor for LogicalUnit.
70

71
    This needs to be overridden in derived classes in order to check op
72
    validity.
73

74
    """
75
    self.proc = processor
76
    self.op = op
77
    self.cfg = context.cfg
78
    self.context = context
79
    self.rpc = rpc
80
    # Dicts used to declare locking needs to mcpu
81
    self.needed_locks = None
82
    self.acquired_locks = {}
83
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
84
    self.add_locks = {}
85
    self.remove_locks = {}
86
    # Used to force good behavior when calling helper functions
87
    self.recalculate_locks = {}
88
    self.__ssh = None
89
    # logging
90
    self.LogWarning = processor.LogWarning
91
    self.LogInfo = processor.LogInfo
92
    self.LogStep = processor.LogStep
93
    # support for dry-run
94
    self.dry_run_result = None
95

    
96
    # Tasklets
97
    self.tasklets = None
98

    
99
    for attr_name in self._OP_REQP:
100
      attr_val = getattr(op, attr_name, None)
101
      if attr_val is None:
102
        raise errors.OpPrereqError("Required parameter '%s' missing" %
103
                                   attr_name)
104

    
105
    self.CheckArguments()
106

    
107
  def __GetSSH(self):
108
    """Returns the SshRunner object
109

110
    """
111
    if not self.__ssh:
112
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
113
    return self.__ssh
114

    
115
  ssh = property(fget=__GetSSH)
116

    
117
  def CheckArguments(self):
118
    """Check syntactic validity for the opcode arguments.
119

120
    This method is for doing a simple syntactic check and ensure
121
    validity of opcode parameters, without any cluster-related
122
    checks. While the same can be accomplished in ExpandNames and/or
123
    CheckPrereq, doing these separate is better because:
124

125
      - ExpandNames is left as as purely a lock-related function
126
      - CheckPrereq is run after we have acquired locks (and possible
127
        waited for them)
128

129
    The function is allowed to change the self.op attribute so that
130
    later methods can no longer worry about missing parameters.
131

132
    """
133
    pass
134

    
135
  def ExpandNames(self):
136
    """Expand names for this LU.
137

138
    This method is called before starting to execute the opcode, and it should
139
    update all the parameters of the opcode to their canonical form (e.g. a
140
    short node name must be fully expanded after this method has successfully
141
    completed). This way locking, hooks, logging, ecc. can work correctly.
142

143
    LUs which implement this method must also populate the self.needed_locks
144
    member, as a dict with lock levels as keys, and a list of needed lock names
145
    as values. Rules:
146

147
      - use an empty dict if you don't need any lock
148
      - if you don't need any lock at a particular level omit that level
149
      - don't put anything for the BGL level
150
      - if you want all locks at a level use locking.ALL_SET as a value
151

152
    If you need to share locks (rather than acquire them exclusively) at one
153
    level you can modify self.share_locks, setting a true value (usually 1) for
154
    that level. By default locks are not shared.
155

156
    This function can also define a list of tasklets, which then will be
157
    executed in order instead of the usual LU-level CheckPrereq and Exec
158
    functions, if those are not defined by the LU.
159

160
    Examples::
161

162
      # Acquire all nodes and one instance
163
      self.needed_locks = {
164
        locking.LEVEL_NODE: locking.ALL_SET,
165
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
166
      }
167
      # Acquire just two nodes
168
      self.needed_locks = {
169
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
170
      }
171
      # Acquire no locks
172
      self.needed_locks = {} # No, you can't leave it to the default value None
173

174
    """
175
    # The implementation of this method is mandatory only if the new LU is
176
    # concurrent, so that old LUs don't need to be changed all at the same
177
    # time.
178
    if self.REQ_BGL:
179
      self.needed_locks = {} # Exclusive LUs don't need locks.
180
    else:
181
      raise NotImplementedError
182

    
183
  def DeclareLocks(self, level):
184
    """Declare LU locking needs for a level
185

186
    While most LUs can just declare their locking needs at ExpandNames time,
187
    sometimes there's the need to calculate some locks after having acquired
188
    the ones before. This function is called just before acquiring locks at a
189
    particular level, but after acquiring the ones at lower levels, and permits
190
    such calculations. It can be used to modify self.needed_locks, and by
191
    default it does nothing.
192

193
    This function is only called if you have something already set in
194
    self.needed_locks for the level.
195

196
    @param level: Locking level which is going to be locked
197
    @type level: member of ganeti.locking.LEVELS
198

199
    """
200

    
201
  def CheckPrereq(self):
202
    """Check prerequisites for this LU.
203

204
    This method should check that the prerequisites for the execution
205
    of this LU are fulfilled. It can do internode communication, but
206
    it should be idempotent - no cluster or system changes are
207
    allowed.
208

209
    The method should raise errors.OpPrereqError in case something is
210
    not fulfilled. Its return value is ignored.
211

212
    This method should also update all the parameters of the opcode to
213
    their canonical form if it hasn't been done by ExpandNames before.
214

215
    """
216
    if self.tasklets is not None:
217
      for (idx, tl) in enumerate(self.tasklets):
218
        logging.debug("Checking prerequisites for tasklet %s/%s",
219
                      idx + 1, len(self.tasklets))
220
        tl.CheckPrereq()
221
    else:
222
      raise NotImplementedError
223

    
224
  def Exec(self, feedback_fn):
225
    """Execute the LU.
226

227
    This method should implement the actual work. It should raise
228
    errors.OpExecError for failures that are somewhat dealt with in
229
    code, or expected.
230

231
    """
232
    if self.tasklets is not None:
233
      for (idx, tl) in enumerate(self.tasklets):
234
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
235
        tl.Exec(feedback_fn)
236
    else:
237
      raise NotImplementedError
238

    
239
  def BuildHooksEnv(self):
240
    """Build hooks environment for this LU.
241

242
    This method should return a three-node tuple consisting of: a dict
243
    containing the environment that will be used for running the
244
    specific hook for this LU, a list of node names on which the hook
245
    should run before the execution, and a list of node names on which
246
    the hook should run after the execution.
247

248
    The keys of the dict must not have 'GANETI_' prefixed as this will
249
    be handled in the hooks runner. Also note additional keys will be
250
    added by the hooks runner. If the LU doesn't define any
251
    environment, an empty dict (and not None) should be returned.
252

253
    No nodes should be returned as an empty list (and not None).
254

255
    Note that if the HPATH for a LU class is None, this function will
256
    not be called.
257

258
    """
259
    raise NotImplementedError
260

    
261
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
262
    """Notify the LU about the results of its hooks.
263

264
    This method is called every time a hooks phase is executed, and notifies
265
    the Logical Unit about the hooks' result. The LU can then use it to alter
266
    its result based on the hooks.  By default the method does nothing and the
267
    previous result is passed back unchanged but any LU can define it if it
268
    wants to use the local cluster hook-scripts somehow.
269

270
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
271
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
272
    @param hook_results: the results of the multi-node hooks rpc call
273
    @param feedback_fn: function used send feedback back to the caller
274
    @param lu_result: the previous Exec result this LU had, or None
275
        in the PRE phase
276
    @return: the new Exec result, based on the previous result
277
        and hook results
278

279
    """
280
    return lu_result
281

    
282
  def _ExpandAndLockInstance(self):
283
    """Helper function to expand and lock an instance.
284

285
    Many LUs that work on an instance take its name in self.op.instance_name
286
    and need to expand it and then declare the expanded name for locking. This
287
    function does it, and then updates self.op.instance_name to the expanded
288
    name. It also initializes needed_locks as a dict, if this hasn't been done
289
    before.
290

291
    """
292
    if self.needed_locks is None:
293
      self.needed_locks = {}
294
    else:
295
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
296
        "_ExpandAndLockInstance called with instance-level locks set"
297
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
298
    if expanded_name is None:
299
      raise errors.OpPrereqError("Instance '%s' not known" %
300
                                  self.op.instance_name)
301
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
302
    self.op.instance_name = expanded_name
303

    
304
  def _LockInstancesNodes(self, primary_only=False):
305
    """Helper function to declare instances' nodes for locking.
306

307
    This function should be called after locking one or more instances to lock
308
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
309
    with all primary or secondary nodes for instances already locked and
310
    present in self.needed_locks[locking.LEVEL_INSTANCE].
311

312
    It should be called from DeclareLocks, and for safety only works if
313
    self.recalculate_locks[locking.LEVEL_NODE] is set.
314

315
    In the future it may grow parameters to just lock some instance's nodes, or
316
    to just lock primaries or secondary nodes, if needed.
317

318
    If should be called in DeclareLocks in a way similar to::
319

320
      if level == locking.LEVEL_NODE:
321
        self._LockInstancesNodes()
322

323
    @type primary_only: boolean
324
    @param primary_only: only lock primary nodes of locked instances
325

326
    """
327
    assert locking.LEVEL_NODE in self.recalculate_locks, \
328
      "_LockInstancesNodes helper function called with no nodes to recalculate"
329

    
330
    # TODO: check if we're really been called with the instance locks held
331

    
332
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
333
    # future we might want to have different behaviors depending on the value
334
    # of self.recalculate_locks[locking.LEVEL_NODE]
335
    wanted_nodes = []
336
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
337
      instance = self.context.cfg.GetInstanceInfo(instance_name)
338
      wanted_nodes.append(instance.primary_node)
339
      if not primary_only:
340
        wanted_nodes.extend(instance.secondary_nodes)
341

    
342
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
343
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
344
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
345
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
346

    
347
    del self.recalculate_locks[locking.LEVEL_NODE]
348

    
349

    
350
class NoHooksLU(LogicalUnit):
351
  """Simple LU which runs no hooks.
352

353
  This LU is intended as a parent for other LogicalUnits which will
354
  run no hooks, in order to reduce duplicate code.
355

356
  """
357
  HPATH = None
358
  HTYPE = None
359

    
360

    
361
class Tasklet:
362
  """Tasklet base class.
363

364
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
365
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
366
  tasklets know nothing about locks.
367

368
  Subclasses must follow these rules:
369
    - Implement CheckPrereq
370
    - Implement Exec
371

372
  """
373
  def __init__(self, lu):
374
    self.lu = lu
375

    
376
    # Shortcuts
377
    self.cfg = lu.cfg
378
    self.rpc = lu.rpc
379

    
380
  def CheckPrereq(self):
381
    """Check prerequisites for this tasklets.
382

383
    This method should check whether the prerequisites for the execution of
384
    this tasklet are fulfilled. It can do internode communication, but it
385
    should be idempotent - no cluster or system changes are allowed.
386

387
    The method should raise errors.OpPrereqError in case something is not
388
    fulfilled. Its return value is ignored.
389

390
    This method should also update all parameters to their canonical form if it
391
    hasn't been done before.
392

393
    """
394
    raise NotImplementedError
395

    
396
  def Exec(self, feedback_fn):
397
    """Execute the tasklet.
398

399
    This method should implement the actual work. It should raise
400
    errors.OpExecError for failures that are somewhat dealt with in code, or
401
    expected.
402

403
    """
404
    raise NotImplementedError
405

    
406

    
407
def _GetWantedNodes(lu, nodes):
408
  """Returns list of checked and expanded node names.
409

410
  @type lu: L{LogicalUnit}
411
  @param lu: the logical unit on whose behalf we execute
412
  @type nodes: list
413
  @param nodes: list of node names or None for all nodes
414
  @rtype: list
415
  @return: the list of nodes, sorted
416
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
417

418
  """
419
  if not isinstance(nodes, list):
420
    raise errors.OpPrereqError("Invalid argument type 'nodes'")
421

    
422
  if not nodes:
423
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
424
      " non-empty list of nodes whose name is to be expanded.")
425

    
426
  wanted = []
427
  for name in nodes:
428
    node = lu.cfg.ExpandNodeName(name)
429
    if node is None:
430
      raise errors.OpPrereqError("No such node name '%s'" % name)
431
    wanted.append(node)
432

    
433
  return utils.NiceSort(wanted)
434

    
435

    
436
def _GetWantedInstances(lu, instances):
437
  """Returns list of checked and expanded instance names.
438

439
  @type lu: L{LogicalUnit}
440
  @param lu: the logical unit on whose behalf we execute
441
  @type instances: list
442
  @param instances: list of instance names or None for all instances
443
  @rtype: list
444
  @return: the list of instances, sorted
445
  @raise errors.OpPrereqError: if the instances parameter is wrong type
446
  @raise errors.OpPrereqError: if any of the passed instances is not found
447

448
  """
449
  if not isinstance(instances, list):
450
    raise errors.OpPrereqError("Invalid argument type 'instances'")
451

    
452
  if instances:
453
    wanted = []
454

    
455
    for name in instances:
456
      instance = lu.cfg.ExpandInstanceName(name)
457
      if instance is None:
458
        raise errors.OpPrereqError("No such instance name '%s'" % name)
459
      wanted.append(instance)
460

    
461
  else:
462
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
463
  return wanted
464

    
465

    
466
def _CheckOutputFields(static, dynamic, selected):
467
  """Checks whether all selected fields are valid.
468

469
  @type static: L{utils.FieldSet}
470
  @param static: static fields set
471
  @type dynamic: L{utils.FieldSet}
472
  @param dynamic: dynamic fields set
473

474
  """
475
  f = utils.FieldSet()
476
  f.Extend(static)
477
  f.Extend(dynamic)
478

    
479
  delta = f.NonMatching(selected)
480
  if delta:
481
    raise errors.OpPrereqError("Unknown output fields selected: %s"
482
                               % ",".join(delta))
483

    
484

    
485
def _CheckBooleanOpField(op, name):
486
  """Validates boolean opcode parameters.
487

488
  This will ensure that an opcode parameter is either a boolean value,
489
  or None (but that it always exists).
490

491
  """
492
  val = getattr(op, name, None)
493
  if not (val is None or isinstance(val, bool)):
494
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
495
                               (name, str(val)))
496
  setattr(op, name, val)
497

    
498

    
499
def _CheckNodeOnline(lu, node):
500
  """Ensure that a given node is online.
501

502
  @param lu: the LU on behalf of which we make the check
503
  @param node: the node to check
504
  @raise errors.OpPrereqError: if the node is offline
505

506
  """
507
  if lu.cfg.GetNodeInfo(node).offline:
508
    raise errors.OpPrereqError("Can't use offline node %s" % node)
509

    
510

    
511
def _CheckNodeNotDrained(lu, node):
512
  """Ensure that a given node is not drained.
513

514
  @param lu: the LU on behalf of which we make the check
515
  @param node: the node to check
516
  @raise errors.OpPrereqError: if the node is drained
517

518
  """
519
  if lu.cfg.GetNodeInfo(node).drained:
520
    raise errors.OpPrereqError("Can't use drained node %s" % node)
521

    
522

    
523
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
524
                          memory, vcpus, nics, disk_template, disks,
525
                          bep, hvp, hypervisor_name):
526
  """Builds instance related env variables for hooks
527

528
  This builds the hook environment from individual variables.
529

530
  @type name: string
531
  @param name: the name of the instance
532
  @type primary_node: string
533
  @param primary_node: the name of the instance's primary node
534
  @type secondary_nodes: list
535
  @param secondary_nodes: list of secondary nodes as strings
536
  @type os_type: string
537
  @param os_type: the name of the instance's OS
538
  @type status: boolean
539
  @param status: the should_run status of the instance
540
  @type memory: string
541
  @param memory: the memory size of the instance
542
  @type vcpus: string
543
  @param vcpus: the count of VCPUs the instance has
544
  @type nics: list
545
  @param nics: list of tuples (ip, mac, mode, link) representing
546
      the NICs the instance has
547
  @type disk_template: string
548
  @param disk_template: the disk template of the instance
549
  @type disks: list
550
  @param disks: the list of (size, mode) pairs
551
  @type bep: dict
552
  @param bep: the backend parameters for the instance
553
  @type hvp: dict
554
  @param hvp: the hypervisor parameters for the instance
555
  @type hypervisor_name: string
556
  @param hypervisor_name: the hypervisor for the instance
557
  @rtype: dict
558
  @return: the hook environment for this instance
559

560
  """
561
  if status:
562
    str_status = "up"
563
  else:
564
    str_status = "down"
565
  env = {
566
    "OP_TARGET": name,
567
    "INSTANCE_NAME": name,
568
    "INSTANCE_PRIMARY": primary_node,
569
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
570
    "INSTANCE_OS_TYPE": os_type,
571
    "INSTANCE_STATUS": str_status,
572
    "INSTANCE_MEMORY": memory,
573
    "INSTANCE_VCPUS": vcpus,
574
    "INSTANCE_DISK_TEMPLATE": disk_template,
575
    "INSTANCE_HYPERVISOR": hypervisor_name,
576
  }
577

    
578
  if nics:
579
    nic_count = len(nics)
580
    for idx, (ip, mac, mode, link) in enumerate(nics):
581
      if ip is None:
582
        ip = ""
583
      env["INSTANCE_NIC%d_IP" % idx] = ip
584
      env["INSTANCE_NIC%d_MAC" % idx] = mac
585
      env["INSTANCE_NIC%d_MODE" % idx] = mode
586
      env["INSTANCE_NIC%d_LINK" % idx] = link
587
      if mode == constants.NIC_MODE_BRIDGED:
588
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
589
  else:
590
    nic_count = 0
591

    
592
  env["INSTANCE_NIC_COUNT"] = nic_count
593

    
594
  if disks:
595
    disk_count = len(disks)
596
    for idx, (size, mode) in enumerate(disks):
597
      env["INSTANCE_DISK%d_SIZE" % idx] = size
598
      env["INSTANCE_DISK%d_MODE" % idx] = mode
599
  else:
600
    disk_count = 0
601

    
602
  env["INSTANCE_DISK_COUNT"] = disk_count
603

    
604
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
605
    for key, value in source.items():
606
      env["INSTANCE_%s_%s" % (kind, key)] = value
607

    
608
  return env
609

    
610

    
611
def _NICListToTuple(lu, nics):
612
  """Build a list of nic information tuples.
613

614
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
615
  value in LUQueryInstanceData.
616

617
  @type lu:  L{LogicalUnit}
618
  @param lu: the logical unit on whose behalf we execute
619
  @type nics: list of L{objects.NIC}
620
  @param nics: list of nics to convert to hooks tuples
621

622
  """
623
  hooks_nics = []
624
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
625
  for nic in nics:
626
    ip = nic.ip
627
    mac = nic.mac
628
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
629
    mode = filled_params[constants.NIC_MODE]
630
    link = filled_params[constants.NIC_LINK]
631
    hooks_nics.append((ip, mac, mode, link))
632
  return hooks_nics
633

    
634

    
635
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
636
  """Builds instance related env variables for hooks from an object.
637

638
  @type lu: L{LogicalUnit}
639
  @param lu: the logical unit on whose behalf we execute
640
  @type instance: L{objects.Instance}
641
  @param instance: the instance for which we should build the
642
      environment
643
  @type override: dict
644
  @param override: dictionary with key/values that will override
645
      our values
646
  @rtype: dict
647
  @return: the hook environment dictionary
648

649
  """
650
  cluster = lu.cfg.GetClusterInfo()
651
  bep = cluster.FillBE(instance)
652
  hvp = cluster.FillHV(instance)
653
  args = {
654
    'name': instance.name,
655
    'primary_node': instance.primary_node,
656
    'secondary_nodes': instance.secondary_nodes,
657
    'os_type': instance.os,
658
    'status': instance.admin_up,
659
    'memory': bep[constants.BE_MEMORY],
660
    'vcpus': bep[constants.BE_VCPUS],
661
    'nics': _NICListToTuple(lu, instance.nics),
662
    'disk_template': instance.disk_template,
663
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
664
    'bep': bep,
665
    'hvp': hvp,
666
    'hypervisor_name': instance.hypervisor,
667
  }
668
  if override:
669
    args.update(override)
670
  return _BuildInstanceHookEnv(**args)
671

    
672

    
673
def _AdjustCandidatePool(lu, exceptions):
674
  """Adjust the candidate pool after node operations.
675

676
  """
677
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
678
  if mod_list:
679
    lu.LogInfo("Promoted nodes to master candidate role: %s",
680
               ", ".join(node.name for node in mod_list))
681
    for name in mod_list:
682
      lu.context.ReaddNode(name)
683
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
684
  if mc_now > mc_max:
685
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
686
               (mc_now, mc_max))
687

    
688

    
689
def _DecideSelfPromotion(lu, exceptions=None):
690
  """Decide whether I should promote myself as a master candidate.
691

692
  """
693
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
694
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
695
  # the new node will increase mc_max with one, so:
696
  mc_should = min(mc_should + 1, cp_size)
697
  return mc_now < mc_should
698

    
699

    
700
def _CheckNicsBridgesExist(lu, target_nics, target_node,
701
                               profile=constants.PP_DEFAULT):
702
  """Check that the brigdes needed by a list of nics exist.
703

704
  """
705
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
706
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
707
                for nic in target_nics]
708
  brlist = [params[constants.NIC_LINK] for params in paramslist
709
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
710
  if brlist:
711
    result = lu.rpc.call_bridges_exist(target_node, brlist)
712
    result.Raise("Error checking bridges on destination node '%s'" %
713
                 target_node, prereq=True)
714

    
715

    
716
def _CheckInstanceBridgesExist(lu, instance, node=None):
717
  """Check that the brigdes needed by an instance exist.
718

719
  """
720
  if node is None:
721
    node = instance.primary_node
722
  _CheckNicsBridgesExist(lu, instance.nics, node)
723

    
724

    
725
def _CheckOSVariant(os_obj, name):
726
  """Check whether an OS name conforms to the os variants specification.
727

728
  @type os_obj: L{objects.OS}
729
  @param os_obj: OS object to check
730
  @type name: string
731
  @param name: OS name passed by the user, to check for validity
732

733
  """
734
  if not os_obj.supported_variants:
735
    return
736
  try:
737
    variant = name.split("+", 1)[1]
738
  except IndexError:
739
    raise errors.OpPrereqError("OS name must include a variant")
740

    
741
  if variant not in os_obj.supported_variants:
742
    raise errors.OpPrereqError("Unsupported OS variant")
743

    
744

    
745
def _GetNodeInstancesInner(cfg, fn):
746
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
747

    
748

    
749
def _GetNodeInstances(cfg, node_name):
750
  """Returns a list of all primary and secondary instances on a node.
751

752
  """
753

    
754
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
755

    
756

    
757
def _GetNodePrimaryInstances(cfg, node_name):
758
  """Returns primary instances on a node.
759

760
  """
761
  return _GetNodeInstancesInner(cfg,
762
                                lambda inst: node_name == inst.primary_node)
763

    
764

    
765
def _GetNodeSecondaryInstances(cfg, node_name):
766
  """Returns secondary instances on a node.
767

768
  """
769
  return _GetNodeInstancesInner(cfg,
770
                                lambda inst: node_name in inst.secondary_nodes)
771

    
772

    
773
def _GetStorageTypeArgs(cfg, storage_type):
774
  """Returns the arguments for a storage type.
775

776
  """
777
  # Special case for file storage
778
  if storage_type == constants.ST_FILE:
779
    # storage.FileStorage wants a list of storage directories
780
    return [[cfg.GetFileStorageDir()]]
781

    
782
  return []
783

    
784

    
785
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
786
  faulty = []
787

    
788
  for dev in instance.disks:
789
    cfg.SetDiskID(dev, node_name)
790

    
791
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
792
  result.Raise("Failed to get disk status from node %s" % node_name,
793
               prereq=prereq)
794

    
795
  for idx, bdev_status in enumerate(result.payload):
796
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
797
      faulty.append(idx)
798

    
799
  return faulty
800

    
801

    
802
class LUPostInitCluster(LogicalUnit):
803
  """Logical unit for running hooks after cluster initialization.
804

805
  """
806
  HPATH = "cluster-init"
807
  HTYPE = constants.HTYPE_CLUSTER
808
  _OP_REQP = []
809

    
810
  def BuildHooksEnv(self):
811
    """Build hooks env.
812

813
    """
814
    env = {"OP_TARGET": self.cfg.GetClusterName()}
815
    mn = self.cfg.GetMasterNode()
816
    return env, [], [mn]
817

    
818
  def CheckPrereq(self):
819
    """No prerequisites to check.
820

821
    """
822
    return True
823

    
824
  def Exec(self, feedback_fn):
825
    """Nothing to do.
826

827
    """
828
    return True
829

    
830

    
831
class LUDestroyCluster(LogicalUnit):
832
  """Logical unit for destroying the cluster.
833

834
  """
835
  HPATH = "cluster-destroy"
836
  HTYPE = constants.HTYPE_CLUSTER
837
  _OP_REQP = []
838

    
839
  def BuildHooksEnv(self):
840
    """Build hooks env.
841

842
    """
843
    env = {"OP_TARGET": self.cfg.GetClusterName()}
844
    return env, [], []
845

    
846
  def CheckPrereq(self):
847
    """Check prerequisites.
848

849
    This checks whether the cluster is empty.
850

851
    Any errors are signaled by raising errors.OpPrereqError.
852

853
    """
854
    master = self.cfg.GetMasterNode()
855

    
856
    nodelist = self.cfg.GetNodeList()
857
    if len(nodelist) != 1 or nodelist[0] != master:
858
      raise errors.OpPrereqError("There are still %d node(s) in"
859
                                 " this cluster." % (len(nodelist) - 1))
860
    instancelist = self.cfg.GetInstanceList()
861
    if instancelist:
862
      raise errors.OpPrereqError("There are still %d instance(s) in"
863
                                 " this cluster." % len(instancelist))
864

    
865
  def Exec(self, feedback_fn):
866
    """Destroys the cluster.
867

868
    """
869
    master = self.cfg.GetMasterNode()
870
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
871

    
872
    # Run post hooks on master node before it's removed
873
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
874
    try:
875
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
876
    except:
877
      self.LogWarning("Errors occurred running hooks on %s" % master)
878

    
879
    result = self.rpc.call_node_stop_master(master, False)
880
    result.Raise("Could not disable the master role")
881

    
882
    if modify_ssh_setup:
883
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
884
      utils.CreateBackup(priv_key)
885
      utils.CreateBackup(pub_key)
886

    
887
    return master
888

    
889

    
890
class LUVerifyCluster(LogicalUnit):
891
  """Verifies the cluster status.
892

893
  """
894
  HPATH = "cluster-verify"
895
  HTYPE = constants.HTYPE_CLUSTER
896
  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
897
  REQ_BGL = False
898

    
899
  TCLUSTER = "cluster"
900
  TNODE = "node"
901
  TINSTANCE = "instance"
902

    
903
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
904
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
905
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
906
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
907
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
908
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
909
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
910
  ENODEDRBD = (TNODE, "ENODEDRBD")
911
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
912
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
913
  ENODEHV = (TNODE, "ENODEHV")
914
  ENODELVM = (TNODE, "ENODELVM")
915
  ENODEN1 = (TNODE, "ENODEN1")
916
  ENODENET = (TNODE, "ENODENET")
917
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
918
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
919
  ENODERPC = (TNODE, "ENODERPC")
920
  ENODESSH = (TNODE, "ENODESSH")
921
  ENODEVERSION = (TNODE, "ENODEVERSION")
922
  ENODESETUP = (TNODE, "ENODESETUP")
923

    
924
  ETYPE_FIELD = "code"
925
  ETYPE_ERROR = "ERROR"
926
  ETYPE_WARNING = "WARNING"
927

    
928
  def ExpandNames(self):
929
    self.needed_locks = {
930
      locking.LEVEL_NODE: locking.ALL_SET,
931
      locking.LEVEL_INSTANCE: locking.ALL_SET,
932
    }
933
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
934

    
935
  def _Error(self, ecode, item, msg, *args, **kwargs):
936
    """Format an error message.
937

938
    Based on the opcode's error_codes parameter, either format a
939
    parseable error code, or a simpler error string.
940

941
    This must be called only from Exec and functions called from Exec.
942

943
    """
944
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
945
    itype, etxt = ecode
946
    # first complete the msg
947
    if args:
948
      msg = msg % args
949
    # then format the whole message
950
    if self.op.error_codes:
951
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
952
    else:
953
      if item:
954
        item = " " + item
955
      else:
956
        item = ""
957
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
958
    # and finally report it via the feedback_fn
959
    self._feedback_fn("  - %s" % msg)
960

    
961
  def _ErrorIf(self, cond, *args, **kwargs):
962
    """Log an error message if the passed condition is True.
963

964
    """
965
    cond = bool(cond) or self.op.debug_simulate_errors
966
    if cond:
967
      self._Error(*args, **kwargs)
968
    # do not mark the operation as failed for WARN cases only
969
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
970
      self.bad = self.bad or cond
971

    
972
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
973
                  node_result, master_files, drbd_map, vg_name):
974
    """Run multiple tests against a node.
975

976
    Test list:
977

978
      - compares ganeti version
979
      - checks vg existence and size > 20G
980
      - checks config file checksum
981
      - checks ssh to other nodes
982

983
    @type nodeinfo: L{objects.Node}
984
    @param nodeinfo: the node to check
985
    @param file_list: required list of files
986
    @param local_cksum: dictionary of local files and their checksums
987
    @param node_result: the results from the node
988
    @param master_files: list of files that only masters should have
989
    @param drbd_map: the useddrbd minors for this node, in
990
        form of minor: (instance, must_exist) which correspond to instances
991
        and their running status
992
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
993

994
    """
995
    node = nodeinfo.name
996
    _ErrorIf = self._ErrorIf
997

    
998
    # main result, node_result should be a non-empty dict
999
    test = not node_result or not isinstance(node_result, dict)
1000
    _ErrorIf(test, self.ENODERPC, node,
1001
                  "unable to verify node: no data returned")
1002
    if test:
1003
      return
1004

    
1005
    # compares ganeti version
1006
    local_version = constants.PROTOCOL_VERSION
1007
    remote_version = node_result.get('version', None)
1008
    test = not (remote_version and
1009
                isinstance(remote_version, (list, tuple)) and
1010
                len(remote_version) == 2)
1011
    _ErrorIf(test, self.ENODERPC, node,
1012
             "connection to node returned invalid data")
1013
    if test:
1014
      return
1015

    
1016
    test = local_version != remote_version[0]
1017
    _ErrorIf(test, self.ENODEVERSION, node,
1018
             "incompatible protocol versions: master %s,"
1019
             " node %s", local_version, remote_version[0])
1020
    if test:
1021
      return
1022

    
1023
    # node seems compatible, we can actually try to look into its results
1024

    
1025
    # full package version
1026
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1027
                  self.ENODEVERSION, node,
1028
                  "software version mismatch: master %s, node %s",
1029
                  constants.RELEASE_VERSION, remote_version[1],
1030
                  code=self.ETYPE_WARNING)
1031

    
1032
    # checks vg existence and size > 20G
1033
    if vg_name is not None:
1034
      vglist = node_result.get(constants.NV_VGLIST, None)
1035
      test = not vglist
1036
      _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1037
      if not test:
1038
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1039
                                              constants.MIN_VG_SIZE)
1040
        _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1041

    
1042
    # checks config file checksum
1043

    
1044
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
1045
    test = not isinstance(remote_cksum, dict)
1046
    _ErrorIf(test, self.ENODEFILECHECK, node,
1047
             "node hasn't returned file checksum data")
1048
    if not test:
1049
      for file_name in file_list:
1050
        node_is_mc = nodeinfo.master_candidate
1051
        must_have = (file_name not in master_files) or node_is_mc
1052
        # missing
1053
        test1 = file_name not in remote_cksum
1054
        # invalid checksum
1055
        test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1056
        # existing and good
1057
        test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1058
        _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1059
                 "file '%s' missing", file_name)
1060
        _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1061
                 "file '%s' has wrong checksum", file_name)
1062
        # not candidate and this is not a must-have file
1063
        _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1064
                 "file '%s' should not exist on non master"
1065
                 " candidates (and the file is outdated)", file_name)
1066
        # all good, except non-master/non-must have combination
1067
        _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1068
                 "file '%s' should not exist"
1069
                 " on non master candidates", file_name)
1070

    
1071
    # checks ssh to any
1072

    
1073
    test = constants.NV_NODELIST not in node_result
1074
    _ErrorIf(test, self.ENODESSH, node,
1075
             "node hasn't returned node ssh connectivity data")
1076
    if not test:
1077
      if node_result[constants.NV_NODELIST]:
1078
        for a_node, a_msg in node_result[constants.NV_NODELIST].items():
1079
          _ErrorIf(True, self.ENODESSH, node,
1080
                   "ssh communication with node '%s': %s", a_node, a_msg)
1081

    
1082
    test = constants.NV_NODENETTEST not in node_result
1083
    _ErrorIf(test, self.ENODENET, node,
1084
             "node hasn't returned node tcp connectivity data")
1085
    if not test:
1086
      if node_result[constants.NV_NODENETTEST]:
1087
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
1088
        for anode in nlist:
1089
          _ErrorIf(True, self.ENODENET, node,
1090
                   "tcp communication with node '%s': %s",
1091
                   anode, node_result[constants.NV_NODENETTEST][anode])
1092

    
1093
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
1094
    if isinstance(hyp_result, dict):
1095
      for hv_name, hv_result in hyp_result.iteritems():
1096
        test = hv_result is not None
1097
        _ErrorIf(test, self.ENODEHV, node,
1098
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1099

    
1100
    # check used drbd list
1101
    if vg_name is not None:
1102
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
1103
      test = not isinstance(used_minors, (tuple, list))
1104
      _ErrorIf(test, self.ENODEDRBD, node,
1105
               "cannot parse drbd status file: %s", str(used_minors))
1106
      if not test:
1107
        for minor, (iname, must_exist) in drbd_map.items():
1108
          test = minor not in used_minors and must_exist
1109
          _ErrorIf(test, self.ENODEDRBD, node,
1110
                   "drbd minor %d of instance %s is not active",
1111
                   minor, iname)
1112
        for minor in used_minors:
1113
          test = minor not in drbd_map
1114
          _ErrorIf(test, self.ENODEDRBD, node,
1115
                   "unallocated drbd minor %d is in use", minor)
1116
    test = node_result.get(constants.NV_NODESETUP,
1117
                           ["Missing NODESETUP results"])
1118
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1119
             "; ".join(test))
1120

    
1121
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1122
                      node_instance, n_offline):
1123
    """Verify an instance.
1124

1125
    This function checks to see if the required block devices are
1126
    available on the instance's node.
1127

1128
    """
1129
    _ErrorIf = self._ErrorIf
1130
    node_current = instanceconfig.primary_node
1131

    
1132
    node_vol_should = {}
1133
    instanceconfig.MapLVsByNode(node_vol_should)
1134

    
1135
    for node in node_vol_should:
1136
      if node in n_offline:
1137
        # ignore missing volumes on offline nodes
1138
        continue
1139
      for volume in node_vol_should[node]:
1140
        test = node not in node_vol_is or volume not in node_vol_is[node]
1141
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1142
                 "volume %s missing on node %s", volume, node)
1143

    
1144
    if instanceconfig.admin_up:
1145
      test = ((node_current not in node_instance or
1146
               not instance in node_instance[node_current]) and
1147
              node_current not in n_offline)
1148
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1149
               "instance not running on its primary node %s",
1150
               node_current)
1151

    
1152
    for node in node_instance:
1153
      if (not node == node_current):
1154
        test = instance in node_instance[node]
1155
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1156
                 "instance should not run on node %s", node)
1157

    
1158
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is):
1159
    """Verify if there are any unknown volumes in the cluster.
1160

1161
    The .os, .swap and backup volumes are ignored. All other volumes are
1162
    reported as unknown.
1163

1164
    """
1165
    for node in node_vol_is:
1166
      for volume in node_vol_is[node]:
1167
        test = (node not in node_vol_should or
1168
                volume not in node_vol_should[node])
1169
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1170
                      "volume %s is unknown", volume)
1171

    
1172
  def _VerifyOrphanInstances(self, instancelist, node_instance):
1173
    """Verify the list of running instances.
1174

1175
    This checks what instances are running but unknown to the cluster.
1176

1177
    """
1178
    for node in node_instance:
1179
      for o_inst in node_instance[node]:
1180
        test = o_inst not in instancelist
1181
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1182
                      "instance %s on node %s should not exist", o_inst, node)
1183

    
1184
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg):
1185
    """Verify N+1 Memory Resilience.
1186

1187
    Check that if one single node dies we can still start all the instances it
1188
    was primary for.
1189

1190
    """
1191
    for node, nodeinfo in node_info.iteritems():
1192
      # This code checks that every node which is now listed as secondary has
1193
      # enough memory to host all instances it is supposed to should a single
1194
      # other node in the cluster fail.
1195
      # FIXME: not ready for failover to an arbitrary node
1196
      # FIXME: does not support file-backed instances
1197
      # WARNING: we currently take into account down instances as well as up
1198
      # ones, considering that even if they're down someone might want to start
1199
      # them even in the event of a node failure.
1200
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1201
        needed_mem = 0
1202
        for instance in instances:
1203
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1204
          if bep[constants.BE_AUTO_BALANCE]:
1205
            needed_mem += bep[constants.BE_MEMORY]
1206
        test = nodeinfo['mfree'] < needed_mem
1207
        self._ErrorIf(test, self.ENODEN1, node,
1208
                      "not enough memory on to accommodate"
1209
                      " failovers should peer node %s fail", prinode)
1210

    
1211
  def CheckPrereq(self):
1212
    """Check prerequisites.
1213

1214
    Transform the list of checks we're going to skip into a set and check that
1215
    all its members are valid.
1216

1217
    """
1218
    self.skip_set = frozenset(self.op.skip_checks)
1219
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1220
      raise errors.OpPrereqError("Invalid checks to be skipped specified")
1221

    
1222
  def BuildHooksEnv(self):
1223
    """Build hooks env.
1224

1225
    Cluster-Verify hooks just ran in the post phase and their failure makes
1226
    the output be logged in the verify output and the verification to fail.
1227

1228
    """
1229
    all_nodes = self.cfg.GetNodeList()
1230
    env = {
1231
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1232
      }
1233
    for node in self.cfg.GetAllNodesInfo().values():
1234
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1235

    
1236
    return env, [], all_nodes
1237

    
1238
  def Exec(self, feedback_fn):
1239
    """Verify integrity of cluster, performing various test on nodes.
1240

1241
    """
1242
    self.bad = False
1243
    _ErrorIf = self._ErrorIf
1244
    verbose = self.op.verbose
1245
    self._feedback_fn = feedback_fn
1246
    feedback_fn("* Verifying global settings")
1247
    for msg in self.cfg.VerifyConfig():
1248
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1249

    
1250
    vg_name = self.cfg.GetVGName()
1251
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1252
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1253
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1254
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1255
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1256
                        for iname in instancelist)
1257
    i_non_redundant = [] # Non redundant instances
1258
    i_non_a_balanced = [] # Non auto-balanced instances
1259
    n_offline = [] # List of offline nodes
1260
    n_drained = [] # List of nodes being drained
1261
    node_volume = {}
1262
    node_instance = {}
1263
    node_info = {}
1264
    instance_cfg = {}
1265

    
1266
    # FIXME: verify OS list
1267
    # do local checksums
1268
    master_files = [constants.CLUSTER_CONF_FILE]
1269

    
1270
    file_names = ssconf.SimpleStore().GetFileList()
1271
    file_names.append(constants.SSL_CERT_FILE)
1272
    file_names.append(constants.RAPI_CERT_FILE)
1273
    file_names.extend(master_files)
1274

    
1275
    local_checksums = utils.FingerprintFiles(file_names)
1276

    
1277
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1278
    node_verify_param = {
1279
      constants.NV_FILELIST: file_names,
1280
      constants.NV_NODELIST: [node.name for node in nodeinfo
1281
                              if not node.offline],
1282
      constants.NV_HYPERVISOR: hypervisors,
1283
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1284
                                  node.secondary_ip) for node in nodeinfo
1285
                                 if not node.offline],
1286
      constants.NV_INSTANCELIST: hypervisors,
1287
      constants.NV_VERSION: None,
1288
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1289
      constants.NV_NODESETUP: None,
1290
      }
1291
    if vg_name is not None:
1292
      node_verify_param[constants.NV_VGLIST] = None
1293
      node_verify_param[constants.NV_LVLIST] = vg_name
1294
      node_verify_param[constants.NV_DRBDLIST] = None
1295
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1296
                                           self.cfg.GetClusterName())
1297

    
1298
    cluster = self.cfg.GetClusterInfo()
1299
    master_node = self.cfg.GetMasterNode()
1300
    all_drbd_map = self.cfg.ComputeDRBDMap()
1301

    
1302
    feedback_fn("* Verifying node status")
1303
    for node_i in nodeinfo:
1304
      node = node_i.name
1305

    
1306
      if node_i.offline:
1307
        if verbose:
1308
          feedback_fn("* Skipping offline node %s" % (node,))
1309
        n_offline.append(node)
1310
        continue
1311

    
1312
      if node == master_node:
1313
        ntype = "master"
1314
      elif node_i.master_candidate:
1315
        ntype = "master candidate"
1316
      elif node_i.drained:
1317
        ntype = "drained"
1318
        n_drained.append(node)
1319
      else:
1320
        ntype = "regular"
1321
      if verbose:
1322
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1323

    
1324
      msg = all_nvinfo[node].fail_msg
1325
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1326
      if msg:
1327
        continue
1328

    
1329
      nresult = all_nvinfo[node].payload
1330
      node_drbd = {}
1331
      for minor, instance in all_drbd_map[node].items():
1332
        test = instance not in instanceinfo
1333
        _ErrorIf(test, self.ECLUSTERCFG, None,
1334
                 "ghost instance '%s' in temporary DRBD map", instance)
1335
          # ghost instance should not be running, but otherwise we
1336
          # don't give double warnings (both ghost instance and
1337
          # unallocated minor in use)
1338
        if test:
1339
          node_drbd[minor] = (instance, False)
1340
        else:
1341
          instance = instanceinfo[instance]
1342
          node_drbd[minor] = (instance.name, instance.admin_up)
1343
      self._VerifyNode(node_i, file_names, local_checksums,
1344
                       nresult, master_files, node_drbd, vg_name)
1345

    
1346
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1347
      if vg_name is None:
1348
        node_volume[node] = {}
1349
      elif isinstance(lvdata, basestring):
1350
        _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1351
                 utils.SafeEncode(lvdata))
1352
        node_volume[node] = {}
1353
      elif not isinstance(lvdata, dict):
1354
        _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1355
        continue
1356
      else:
1357
        node_volume[node] = lvdata
1358

    
1359
      # node_instance
1360
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1361
      test = not isinstance(idata, list)
1362
      _ErrorIf(test, self.ENODEHV, node,
1363
               "rpc call to node failed (instancelist)")
1364
      if test:
1365
        continue
1366

    
1367
      node_instance[node] = idata
1368

    
1369
      # node_info
1370
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1371
      test = not isinstance(nodeinfo, dict)
1372
      _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1373
      if test:
1374
        continue
1375

    
1376
      try:
1377
        node_info[node] = {
1378
          "mfree": int(nodeinfo['memory_free']),
1379
          "pinst": [],
1380
          "sinst": [],
1381
          # dictionary holding all instances this node is secondary for,
1382
          # grouped by their primary node. Each key is a cluster node, and each
1383
          # value is a list of instances which have the key as primary and the
1384
          # current node as secondary.  this is handy to calculate N+1 memory
1385
          # availability if you can only failover from a primary to its
1386
          # secondary.
1387
          "sinst-by-pnode": {},
1388
        }
1389
        # FIXME: devise a free space model for file based instances as well
1390
        if vg_name is not None:
1391
          test = (constants.NV_VGLIST not in nresult or
1392
                  vg_name not in nresult[constants.NV_VGLIST])
1393
          _ErrorIf(test, self.ENODELVM, node,
1394
                   "node didn't return data for the volume group '%s'"
1395
                   " - it is either missing or broken", vg_name)
1396
          if test:
1397
            continue
1398
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1399
      except (ValueError, KeyError):
1400
        _ErrorIf(True, self.ENODERPC, node,
1401
                 "node returned invalid nodeinfo, check lvm/hypervisor")
1402
        continue
1403

    
1404
    node_vol_should = {}
1405

    
1406
    feedback_fn("* Verifying instance status")
1407
    for instance in instancelist:
1408
      if verbose:
1409
        feedback_fn("* Verifying instance %s" % instance)
1410
      inst_config = instanceinfo[instance]
1411
      self._VerifyInstance(instance, inst_config, node_volume,
1412
                           node_instance, n_offline)
1413
      inst_nodes_offline = []
1414

    
1415
      inst_config.MapLVsByNode(node_vol_should)
1416

    
1417
      instance_cfg[instance] = inst_config
1418

    
1419
      pnode = inst_config.primary_node
1420
      _ErrorIf(pnode not in node_info and pnode not in n_offline,
1421
               self.ENODERPC, pnode, "instance %s, connection to"
1422
               " primary node failed", instance)
1423
      if pnode in node_info:
1424
        node_info[pnode]['pinst'].append(instance)
1425

    
1426
      if pnode in n_offline:
1427
        inst_nodes_offline.append(pnode)
1428

    
1429
      # If the instance is non-redundant we cannot survive losing its primary
1430
      # node, so we are not N+1 compliant. On the other hand we have no disk
1431
      # templates with more than one secondary so that situation is not well
1432
      # supported either.
1433
      # FIXME: does not support file-backed instances
1434
      if len(inst_config.secondary_nodes) == 0:
1435
        i_non_redundant.append(instance)
1436
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
1437
               self.EINSTANCELAYOUT, instance,
1438
               "instance has multiple secondary nodes", code="WARNING")
1439

    
1440
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1441
        i_non_a_balanced.append(instance)
1442

    
1443
      for snode in inst_config.secondary_nodes:
1444
        _ErrorIf(snode not in node_info and snode not in n_offline,
1445
                 self.ENODERPC, snode,
1446
                 "instance %s, connection to secondary node"
1447
                 "failed", instance)
1448

    
1449
        if snode in node_info:
1450
          node_info[snode]['sinst'].append(instance)
1451
          if pnode not in node_info[snode]['sinst-by-pnode']:
1452
            node_info[snode]['sinst-by-pnode'][pnode] = []
1453
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1454

    
1455
        if snode in n_offline:
1456
          inst_nodes_offline.append(snode)
1457

    
1458
      # warn that the instance lives on offline nodes
1459
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1460
               "instance lives on offline node(s) %s",
1461
               ", ".join(inst_nodes_offline))
1462

    
1463
    feedback_fn("* Verifying orphan volumes")
1464
    self._VerifyOrphanVolumes(node_vol_should, node_volume)
1465

    
1466
    feedback_fn("* Verifying remaining instances")
1467
    self._VerifyOrphanInstances(instancelist, node_instance)
1468

    
1469
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1470
      feedback_fn("* Verifying N+1 Memory redundancy")
1471
      self._VerifyNPlusOneMemory(node_info, instance_cfg)
1472

    
1473
    feedback_fn("* Other Notes")
1474
    if i_non_redundant:
1475
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1476
                  % len(i_non_redundant))
1477

    
1478
    if i_non_a_balanced:
1479
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1480
                  % len(i_non_a_balanced))
1481

    
1482
    if n_offline:
1483
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1484

    
1485
    if n_drained:
1486
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1487

    
1488
    return not self.bad
1489

    
1490
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1491
    """Analyze the post-hooks' result
1492

1493
    This method analyses the hook result, handles it, and sends some
1494
    nicely-formatted feedback back to the user.
1495

1496
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1497
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1498
    @param hooks_results: the results of the multi-node hooks rpc call
1499
    @param feedback_fn: function used send feedback back to the caller
1500
    @param lu_result: previous Exec result
1501
    @return: the new Exec result, based on the previous result
1502
        and hook results
1503

1504
    """
1505
    # We only really run POST phase hooks, and are only interested in
1506
    # their results
1507
    if phase == constants.HOOKS_PHASE_POST:
1508
      # Used to change hooks' output to proper indentation
1509
      indent_re = re.compile('^', re.M)
1510
      feedback_fn("* Hooks Results")
1511
      assert hooks_results, "invalid result from hooks"
1512

    
1513
      for node_name in hooks_results:
1514
        show_node_header = True
1515
        res = hooks_results[node_name]
1516
        msg = res.fail_msg
1517
        test = msg and not res.offline
1518
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
1519
                      "Communication failure in hooks execution: %s", msg)
1520
        if test:
1521
          # override manually lu_result here as _ErrorIf only
1522
          # overrides self.bad
1523
          lu_result = 1
1524
          continue
1525
        for script, hkr, output in res.payload:
1526
          test = hkr == constants.HKR_FAIL
1527
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
1528
                        "Script %s failed, output:", script)
1529
          if test:
1530
            output = indent_re.sub('      ', output)
1531
            feedback_fn("%s" % output)
1532
            lu_result = 1
1533

    
1534
      return lu_result
1535

    
1536

    
1537
class LUVerifyDisks(NoHooksLU):
1538
  """Verifies the cluster disks status.
1539

1540
  """
1541
  _OP_REQP = []
1542
  REQ_BGL = False
1543

    
1544
  def ExpandNames(self):
1545
    self.needed_locks = {
1546
      locking.LEVEL_NODE: locking.ALL_SET,
1547
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1548
    }
1549
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1550

    
1551
  def CheckPrereq(self):
1552
    """Check prerequisites.
1553

1554
    This has no prerequisites.
1555

1556
    """
1557
    pass
1558

    
1559
  def Exec(self, feedback_fn):
1560
    """Verify integrity of cluster disks.
1561

1562
    @rtype: tuple of three items
1563
    @return: a tuple of (dict of node-to-node_error, list of instances
1564
        which need activate-disks, dict of instance: (node, volume) for
1565
        missing volumes
1566

1567
    """
1568
    result = res_nodes, res_instances, res_missing = {}, [], {}
1569

    
1570
    vg_name = self.cfg.GetVGName()
1571
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1572
    instances = [self.cfg.GetInstanceInfo(name)
1573
                 for name in self.cfg.GetInstanceList()]
1574

    
1575
    nv_dict = {}
1576
    for inst in instances:
1577
      inst_lvs = {}
1578
      if (not inst.admin_up or
1579
          inst.disk_template not in constants.DTS_NET_MIRROR):
1580
        continue
1581
      inst.MapLVsByNode(inst_lvs)
1582
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1583
      for node, vol_list in inst_lvs.iteritems():
1584
        for vol in vol_list:
1585
          nv_dict[(node, vol)] = inst
1586

    
1587
    if not nv_dict:
1588
      return result
1589

    
1590
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1591

    
1592
    for node in nodes:
1593
      # node_volume
1594
      node_res = node_lvs[node]
1595
      if node_res.offline:
1596
        continue
1597
      msg = node_res.fail_msg
1598
      if msg:
1599
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1600
        res_nodes[node] = msg
1601
        continue
1602

    
1603
      lvs = node_res.payload
1604
      for lv_name, (_, lv_inactive, lv_online) in lvs.items():
1605
        inst = nv_dict.pop((node, lv_name), None)
1606
        if (not lv_online and inst is not None
1607
            and inst.name not in res_instances):
1608
          res_instances.append(inst.name)
1609

    
1610
    # any leftover items in nv_dict are missing LVs, let's arrange the
1611
    # data better
1612
    for key, inst in nv_dict.iteritems():
1613
      if inst.name not in res_missing:
1614
        res_missing[inst.name] = []
1615
      res_missing[inst.name].append(key)
1616

    
1617
    return result
1618

    
1619

    
1620
class LURepairDiskSizes(NoHooksLU):
1621
  """Verifies the cluster disks sizes.
1622

1623
  """
1624
  _OP_REQP = ["instances"]
1625
  REQ_BGL = False
1626

    
1627
  def ExpandNames(self):
1628
    if not isinstance(self.op.instances, list):
1629
      raise errors.OpPrereqError("Invalid argument type 'instances'")
1630

    
1631
    if self.op.instances:
1632
      self.wanted_names = []
1633
      for name in self.op.instances:
1634
        full_name = self.cfg.ExpandInstanceName(name)
1635
        if full_name is None:
1636
          raise errors.OpPrereqError("Instance '%s' not known" % name)
1637
        self.wanted_names.append(full_name)
1638
      self.needed_locks = {
1639
        locking.LEVEL_NODE: [],
1640
        locking.LEVEL_INSTANCE: self.wanted_names,
1641
        }
1642
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1643
    else:
1644
      self.wanted_names = None
1645
      self.needed_locks = {
1646
        locking.LEVEL_NODE: locking.ALL_SET,
1647
        locking.LEVEL_INSTANCE: locking.ALL_SET,
1648
        }
1649
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1650

    
1651
  def DeclareLocks(self, level):
1652
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
1653
      self._LockInstancesNodes(primary_only=True)
1654

    
1655
  def CheckPrereq(self):
1656
    """Check prerequisites.
1657

1658
    This only checks the optional instance list against the existing names.
1659

1660
    """
1661
    if self.wanted_names is None:
1662
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1663

    
1664
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1665
                             in self.wanted_names]
1666

    
1667
  def _EnsureChildSizes(self, disk):
1668
    """Ensure children of the disk have the needed disk size.
1669

1670
    This is valid mainly for DRBD8 and fixes an issue where the
1671
    children have smaller disk size.
1672

1673
    @param disk: an L{ganeti.objects.Disk} object
1674

1675
    """
1676
    if disk.dev_type == constants.LD_DRBD8:
1677
      assert disk.children, "Empty children for DRBD8?"
1678
      fchild = disk.children[0]
1679
      mismatch = fchild.size < disk.size
1680
      if mismatch:
1681
        self.LogInfo("Child disk has size %d, parent %d, fixing",
1682
                     fchild.size, disk.size)
1683
        fchild.size = disk.size
1684

    
1685
      # and we recurse on this child only, not on the metadev
1686
      return self._EnsureChildSizes(fchild) or mismatch
1687
    else:
1688
      return False
1689

    
1690
  def Exec(self, feedback_fn):
1691
    """Verify the size of cluster disks.
1692

1693
    """
1694
    # TODO: check child disks too
1695
    # TODO: check differences in size between primary/secondary nodes
1696
    per_node_disks = {}
1697
    for instance in self.wanted_instances:
1698
      pnode = instance.primary_node
1699
      if pnode not in per_node_disks:
1700
        per_node_disks[pnode] = []
1701
      for idx, disk in enumerate(instance.disks):
1702
        per_node_disks[pnode].append((instance, idx, disk))
1703

    
1704
    changed = []
1705
    for node, dskl in per_node_disks.items():
1706
      newl = [v[2].Copy() for v in dskl]
1707
      for dsk in newl:
1708
        self.cfg.SetDiskID(dsk, node)
1709
      result = self.rpc.call_blockdev_getsizes(node, newl)
1710
      if result.fail_msg:
1711
        self.LogWarning("Failure in blockdev_getsizes call to node"
1712
                        " %s, ignoring", node)
1713
        continue
1714
      if len(result.data) != len(dskl):
1715
        self.LogWarning("Invalid result from node %s, ignoring node results",
1716
                        node)
1717
        continue
1718
      for ((instance, idx, disk), size) in zip(dskl, result.data):
1719
        if size is None:
1720
          self.LogWarning("Disk %d of instance %s did not return size"
1721
                          " information, ignoring", idx, instance.name)
1722
          continue
1723
        if not isinstance(size, (int, long)):
1724
          self.LogWarning("Disk %d of instance %s did not return valid"
1725
                          " size information, ignoring", idx, instance.name)
1726
          continue
1727
        size = size >> 20
1728
        if size != disk.size:
1729
          self.LogInfo("Disk %d of instance %s has mismatched size,"
1730
                       " correcting: recorded %d, actual %d", idx,
1731
                       instance.name, disk.size, size)
1732
          disk.size = size
1733
          self.cfg.Update(instance, feedback_fn)
1734
          changed.append((instance.name, idx, size))
1735
        if self._EnsureChildSizes(disk):
1736
          self.cfg.Update(instance, feedback_fn)
1737
          changed.append((instance.name, idx, disk.size))
1738
    return changed
1739

    
1740

    
1741
class LURenameCluster(LogicalUnit):
1742
  """Rename the cluster.
1743

1744
  """
1745
  HPATH = "cluster-rename"
1746
  HTYPE = constants.HTYPE_CLUSTER
1747
  _OP_REQP = ["name"]
1748

    
1749
  def BuildHooksEnv(self):
1750
    """Build hooks env.
1751

1752
    """
1753
    env = {
1754
      "OP_TARGET": self.cfg.GetClusterName(),
1755
      "NEW_NAME": self.op.name,
1756
      }
1757
    mn = self.cfg.GetMasterNode()
1758
    return env, [mn], [mn]
1759

    
1760
  def CheckPrereq(self):
1761
    """Verify that the passed name is a valid one.
1762

1763
    """
1764
    hostname = utils.HostInfo(self.op.name)
1765

    
1766
    new_name = hostname.name
1767
    self.ip = new_ip = hostname.ip
1768
    old_name = self.cfg.GetClusterName()
1769
    old_ip = self.cfg.GetMasterIP()
1770
    if new_name == old_name and new_ip == old_ip:
1771
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1772
                                 " cluster has changed")
1773
    if new_ip != old_ip:
1774
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1775
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1776
                                   " reachable on the network. Aborting." %
1777
                                   new_ip)
1778

    
1779
    self.op.name = new_name
1780

    
1781
  def Exec(self, feedback_fn):
1782
    """Rename the cluster.
1783

1784
    """
1785
    clustername = self.op.name
1786
    ip = self.ip
1787

    
1788
    # shutdown the master IP
1789
    master = self.cfg.GetMasterNode()
1790
    result = self.rpc.call_node_stop_master(master, False)
1791
    result.Raise("Could not disable the master role")
1792

    
1793
    try:
1794
      cluster = self.cfg.GetClusterInfo()
1795
      cluster.cluster_name = clustername
1796
      cluster.master_ip = ip
1797
      self.cfg.Update(cluster, feedback_fn)
1798

    
1799
      # update the known hosts file
1800
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1801
      node_list = self.cfg.GetNodeList()
1802
      try:
1803
        node_list.remove(master)
1804
      except ValueError:
1805
        pass
1806
      result = self.rpc.call_upload_file(node_list,
1807
                                         constants.SSH_KNOWN_HOSTS_FILE)
1808
      for to_node, to_result in result.iteritems():
1809
        msg = to_result.fail_msg
1810
        if msg:
1811
          msg = ("Copy of file %s to node %s failed: %s" %
1812
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1813
          self.proc.LogWarning(msg)
1814

    
1815
    finally:
1816
      result = self.rpc.call_node_start_master(master, False, False)
1817
      msg = result.fail_msg
1818
      if msg:
1819
        self.LogWarning("Could not re-enable the master role on"
1820
                        " the master, please restart manually: %s", msg)
1821

    
1822

    
1823
def _RecursiveCheckIfLVMBased(disk):
1824
  """Check if the given disk or its children are lvm-based.
1825

1826
  @type disk: L{objects.Disk}
1827
  @param disk: the disk to check
1828
  @rtype: boolean
1829
  @return: boolean indicating whether a LD_LV dev_type was found or not
1830

1831
  """
1832
  if disk.children:
1833
    for chdisk in disk.children:
1834
      if _RecursiveCheckIfLVMBased(chdisk):
1835
        return True
1836
  return disk.dev_type == constants.LD_LV
1837

    
1838

    
1839
class LUSetClusterParams(LogicalUnit):
1840
  """Change the parameters of the cluster.
1841

1842
  """
1843
  HPATH = "cluster-modify"
1844
  HTYPE = constants.HTYPE_CLUSTER
1845
  _OP_REQP = []
1846
  REQ_BGL = False
1847

    
1848
  def CheckArguments(self):
1849
    """Check parameters
1850

1851
    """
1852
    if not hasattr(self.op, "candidate_pool_size"):
1853
      self.op.candidate_pool_size = None
1854
    if self.op.candidate_pool_size is not None:
1855
      try:
1856
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1857
      except (ValueError, TypeError), err:
1858
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1859
                                   str(err))
1860
      if self.op.candidate_pool_size < 1:
1861
        raise errors.OpPrereqError("At least one master candidate needed")
1862

    
1863
  def ExpandNames(self):
1864
    # FIXME: in the future maybe other cluster params won't require checking on
1865
    # all nodes to be modified.
1866
    self.needed_locks = {
1867
      locking.LEVEL_NODE: locking.ALL_SET,
1868
    }
1869
    self.share_locks[locking.LEVEL_NODE] = 1
1870

    
1871
  def BuildHooksEnv(self):
1872
    """Build hooks env.
1873

1874
    """
1875
    env = {
1876
      "OP_TARGET": self.cfg.GetClusterName(),
1877
      "NEW_VG_NAME": self.op.vg_name,
1878
      }
1879
    mn = self.cfg.GetMasterNode()
1880
    return env, [mn], [mn]
1881

    
1882
  def CheckPrereq(self):
1883
    """Check prerequisites.
1884

1885
    This checks whether the given params don't conflict and
1886
    if the given volume group is valid.
1887

1888
    """
1889
    if self.op.vg_name is not None and not self.op.vg_name:
1890
      instances = self.cfg.GetAllInstancesInfo().values()
1891
      for inst in instances:
1892
        for disk in inst.disks:
1893
          if _RecursiveCheckIfLVMBased(disk):
1894
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1895
                                       " lvm-based instances exist")
1896

    
1897
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1898

    
1899
    # if vg_name not None, checks given volume group on all nodes
1900
    if self.op.vg_name:
1901
      vglist = self.rpc.call_vg_list(node_list)
1902
      for node in node_list:
1903
        msg = vglist[node].fail_msg
1904
        if msg:
1905
          # ignoring down node
1906
          self.LogWarning("Error while gathering data on node %s"
1907
                          " (ignoring node): %s", node, msg)
1908
          continue
1909
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
1910
                                              self.op.vg_name,
1911
                                              constants.MIN_VG_SIZE)
1912
        if vgstatus:
1913
          raise errors.OpPrereqError("Error on node '%s': %s" %
1914
                                     (node, vgstatus))
1915

    
1916
    self.cluster = cluster = self.cfg.GetClusterInfo()
1917
    # validate params changes
1918
    if self.op.beparams:
1919
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1920
      self.new_beparams = objects.FillDict(
1921
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
1922

    
1923
    if self.op.nicparams:
1924
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
1925
      self.new_nicparams = objects.FillDict(
1926
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
1927
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
1928

    
1929
    # hypervisor list/parameters
1930
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
1931
    if self.op.hvparams:
1932
      if not isinstance(self.op.hvparams, dict):
1933
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
1934
      for hv_name, hv_dict in self.op.hvparams.items():
1935
        if hv_name not in self.new_hvparams:
1936
          self.new_hvparams[hv_name] = hv_dict
1937
        else:
1938
          self.new_hvparams[hv_name].update(hv_dict)
1939

    
1940
    if self.op.enabled_hypervisors is not None:
1941
      self.hv_list = self.op.enabled_hypervisors
1942
      if not self.hv_list:
1943
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
1944
                                   " least one member")
1945
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
1946
      if invalid_hvs:
1947
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
1948
                                   " entries: %s" % " ,".join(invalid_hvs))
1949
    else:
1950
      self.hv_list = cluster.enabled_hypervisors
1951

    
1952
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
1953
      # either the enabled list has changed, or the parameters have, validate
1954
      for hv_name, hv_params in self.new_hvparams.items():
1955
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
1956
            (self.op.enabled_hypervisors and
1957
             hv_name in self.op.enabled_hypervisors)):
1958
          # either this is a new hypervisor, or its parameters have changed
1959
          hv_class = hypervisor.GetHypervisor(hv_name)
1960
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1961
          hv_class.CheckParameterSyntax(hv_params)
1962
          _CheckHVParams(self, node_list, hv_name, hv_params)
1963

    
1964
  def Exec(self, feedback_fn):
1965
    """Change the parameters of the cluster.
1966

1967
    """
1968
    if self.op.vg_name is not None:
1969
      new_volume = self.op.vg_name
1970
      if not new_volume:
1971
        new_volume = None
1972
      if new_volume != self.cfg.GetVGName():
1973
        self.cfg.SetVGName(new_volume)
1974
      else:
1975
        feedback_fn("Cluster LVM configuration already in desired"
1976
                    " state, not changing")
1977
    if self.op.hvparams:
1978
      self.cluster.hvparams = self.new_hvparams
1979
    if self.op.enabled_hypervisors is not None:
1980
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1981
    if self.op.beparams:
1982
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
1983
    if self.op.nicparams:
1984
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
1985

    
1986
    if self.op.candidate_pool_size is not None:
1987
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
1988
      # we need to update the pool size here, otherwise the save will fail
1989
      _AdjustCandidatePool(self, [])
1990

    
1991
    self.cfg.Update(self.cluster, feedback_fn)
1992

    
1993

    
1994
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
1995
  """Distribute additional files which are part of the cluster configuration.
1996

1997
  ConfigWriter takes care of distributing the config and ssconf files, but
1998
  there are more files which should be distributed to all nodes. This function
1999
  makes sure those are copied.
2000

2001
  @param lu: calling logical unit
2002
  @param additional_nodes: list of nodes not in the config to distribute to
2003

2004
  """
2005
  # 1. Gather target nodes
2006
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2007
  dist_nodes = lu.cfg.GetNodeList()
2008
  if additional_nodes is not None:
2009
    dist_nodes.extend(additional_nodes)
2010
  if myself.name in dist_nodes:
2011
    dist_nodes.remove(myself.name)
2012

    
2013
  # 2. Gather files to distribute
2014
  dist_files = set([constants.ETC_HOSTS,
2015
                    constants.SSH_KNOWN_HOSTS_FILE,
2016
                    constants.RAPI_CERT_FILE,
2017
                    constants.RAPI_USERS_FILE,
2018
                    constants.HMAC_CLUSTER_KEY,
2019
                   ])
2020

    
2021
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2022
  for hv_name in enabled_hypervisors:
2023
    hv_class = hypervisor.GetHypervisor(hv_name)
2024
    dist_files.update(hv_class.GetAncillaryFiles())
2025

    
2026
  # 3. Perform the files upload
2027
  for fname in dist_files:
2028
    if os.path.exists(fname):
2029
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2030
      for to_node, to_result in result.items():
2031
        msg = to_result.fail_msg
2032
        if msg:
2033
          msg = ("Copy of file %s to node %s failed: %s" %
2034
                 (fname, to_node, msg))
2035
          lu.proc.LogWarning(msg)
2036

    
2037

    
2038
class LURedistributeConfig(NoHooksLU):
2039
  """Force the redistribution of cluster configuration.
2040

2041
  This is a very simple LU.
2042

2043
  """
2044
  _OP_REQP = []
2045
  REQ_BGL = False
2046

    
2047
  def ExpandNames(self):
2048
    self.needed_locks = {
2049
      locking.LEVEL_NODE: locking.ALL_SET,
2050
    }
2051
    self.share_locks[locking.LEVEL_NODE] = 1
2052

    
2053
  def CheckPrereq(self):
2054
    """Check prerequisites.
2055

2056
    """
2057

    
2058
  def Exec(self, feedback_fn):
2059
    """Redistribute the configuration.
2060

2061
    """
2062
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2063
    _RedistributeAncillaryFiles(self)
2064

    
2065

    
2066
def _WaitForSync(lu, instance, oneshot=False, unlock=False):
2067
  """Sleep and poll for an instance's disk to sync.
2068

2069
  """
2070
  if not instance.disks:
2071
    return True
2072

    
2073
  if not oneshot:
2074
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2075

    
2076
  node = instance.primary_node
2077

    
2078
  for dev in instance.disks:
2079
    lu.cfg.SetDiskID(dev, node)
2080

    
2081
  retries = 0
2082
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2083
  while True:
2084
    max_time = 0
2085
    done = True
2086
    cumul_degraded = False
2087
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2088
    msg = rstats.fail_msg
2089
    if msg:
2090
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2091
      retries += 1
2092
      if retries >= 10:
2093
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2094
                                 " aborting." % node)
2095
      time.sleep(6)
2096
      continue
2097
    rstats = rstats.payload
2098
    retries = 0
2099
    for i, mstat in enumerate(rstats):
2100
      if mstat is None:
2101
        lu.LogWarning("Can't compute data for node %s/%s",
2102
                           node, instance.disks[i].iv_name)
2103
        continue
2104

    
2105
      cumul_degraded = (cumul_degraded or
2106
                        (mstat.is_degraded and mstat.sync_percent is None))
2107
      if mstat.sync_percent is not None:
2108
        done = False
2109
        if mstat.estimated_time is not None:
2110
          rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2111
          max_time = mstat.estimated_time
2112
        else:
2113
          rem_time = "no time estimate"
2114
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2115
                        (instance.disks[i].iv_name, mstat.sync_percent,
2116
                         rem_time))
2117

    
2118
    # if we're done but degraded, let's do a few small retries, to
2119
    # make sure we see a stable and not transient situation; therefore
2120
    # we force restart of the loop
2121
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2122
      logging.info("Degraded disks found, %d retries left", degr_retries)
2123
      degr_retries -= 1
2124
      time.sleep(1)
2125
      continue
2126

    
2127
    if done or oneshot:
2128
      break
2129

    
2130
    time.sleep(min(60, max_time))
2131

    
2132
  if done:
2133
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2134
  return not cumul_degraded
2135

    
2136

    
2137
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2138
  """Check that mirrors are not degraded.
2139

2140
  The ldisk parameter, if True, will change the test from the
2141
  is_degraded attribute (which represents overall non-ok status for
2142
  the device(s)) to the ldisk (representing the local storage status).
2143

2144
  """
2145
  lu.cfg.SetDiskID(dev, node)
2146

    
2147
  result = True
2148

    
2149
  if on_primary or dev.AssembleOnSecondary():
2150
    rstats = lu.rpc.call_blockdev_find(node, dev)
2151
    msg = rstats.fail_msg
2152
    if msg:
2153
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2154
      result = False
2155
    elif not rstats.payload:
2156
      lu.LogWarning("Can't find disk on node %s", node)
2157
      result = False
2158
    else:
2159
      if ldisk:
2160
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2161
      else:
2162
        result = result and not rstats.payload.is_degraded
2163

    
2164
  if dev.children:
2165
    for child in dev.children:
2166
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2167

    
2168
  return result
2169

    
2170

    
2171
class LUDiagnoseOS(NoHooksLU):
2172
  """Logical unit for OS diagnose/query.
2173

2174
  """
2175
  _OP_REQP = ["output_fields", "names"]
2176
  REQ_BGL = False
2177
  _FIELDS_STATIC = utils.FieldSet()
2178
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2179
  # Fields that need calculation of global os validity
2180
  _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2181

    
2182
  def ExpandNames(self):
2183
    if self.op.names:
2184
      raise errors.OpPrereqError("Selective OS query not supported")
2185

    
2186
    _CheckOutputFields(static=self._FIELDS_STATIC,
2187
                       dynamic=self._FIELDS_DYNAMIC,
2188
                       selected=self.op.output_fields)
2189

    
2190
    # Lock all nodes, in shared mode
2191
    # Temporary removal of locks, should be reverted later
2192
    # TODO: reintroduce locks when they are lighter-weight
2193
    self.needed_locks = {}
2194
    #self.share_locks[locking.LEVEL_NODE] = 1
2195
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2196

    
2197
  def CheckPrereq(self):
2198
    """Check prerequisites.
2199

2200
    """
2201

    
2202
  @staticmethod
2203
  def _DiagnoseByOS(node_list, rlist):
2204
    """Remaps a per-node return list into an a per-os per-node dictionary
2205

2206
    @param node_list: a list with the names of all nodes
2207
    @param rlist: a map with node names as keys and OS objects as values
2208

2209
    @rtype: dict
2210
    @return: a dictionary with osnames as keys and as value another map, with
2211
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
2212

2213
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2214
                                     (/srv/..., False, "invalid api")],
2215
                           "node2": [(/srv/..., True, "")]}
2216
          }
2217

2218
    """
2219
    all_os = {}
2220
    # we build here the list of nodes that didn't fail the RPC (at RPC
2221
    # level), so that nodes with a non-responding node daemon don't
2222
    # make all OSes invalid
2223
    good_nodes = [node_name for node_name in rlist
2224
                  if not rlist[node_name].fail_msg]
2225
    for node_name, nr in rlist.items():
2226
      if nr.fail_msg or not nr.payload:
2227
        continue
2228
      for name, path, status, diagnose, variants in nr.payload:
2229
        if name not in all_os:
2230
          # build a list of nodes for this os containing empty lists
2231
          # for each node in node_list
2232
          all_os[name] = {}
2233
          for nname in good_nodes:
2234
            all_os[name][nname] = []
2235
        all_os[name][node_name].append((path, status, diagnose, variants))
2236
    return all_os
2237

    
2238
  def Exec(self, feedback_fn):
2239
    """Compute the list of OSes.
2240

2241
    """
2242
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2243
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2244
    pol = self._DiagnoseByOS(valid_nodes, node_data)
2245
    output = []
2246
    calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2247
    calc_variants = "variants" in self.op.output_fields
2248

    
2249
    for os_name, os_data in pol.items():
2250
      row = []
2251
      if calc_valid:
2252
        valid = True
2253
        variants = None
2254
        for osl in os_data.values():
2255
          valid = valid and osl and osl[0][1]
2256
          if not valid:
2257
            variants = None
2258
            break
2259
          if calc_variants:
2260
            node_variants = osl[0][3]
2261
            if variants is None:
2262
              variants = node_variants
2263
            else:
2264
              variants = [v for v in variants if v in node_variants]
2265

    
2266
      for field in self.op.output_fields:
2267
        if field == "name":
2268
          val = os_name
2269
        elif field == "valid":
2270
          val = valid
2271
        elif field == "node_status":
2272
          # this is just a copy of the dict
2273
          val = {}
2274
          for node_name, nos_list in os_data.items():
2275
            val[node_name] = nos_list
2276
        elif field == "variants":
2277
          val =  variants
2278
        else:
2279
          raise errors.ParameterError(field)
2280
        row.append(val)
2281
      output.append(row)
2282

    
2283
    return output
2284

    
2285

    
2286
class LURemoveNode(LogicalUnit):
2287
  """Logical unit for removing a node.
2288

2289
  """
2290
  HPATH = "node-remove"
2291
  HTYPE = constants.HTYPE_NODE
2292
  _OP_REQP = ["node_name"]
2293

    
2294
  def BuildHooksEnv(self):
2295
    """Build hooks env.
2296

2297
    This doesn't run on the target node in the pre phase as a failed
2298
    node would then be impossible to remove.
2299

2300
    """
2301
    env = {
2302
      "OP_TARGET": self.op.node_name,
2303
      "NODE_NAME": self.op.node_name,
2304
      }
2305
    all_nodes = self.cfg.GetNodeList()
2306
    if self.op.node_name in all_nodes:
2307
      all_nodes.remove(self.op.node_name)
2308
    return env, all_nodes, all_nodes
2309

    
2310
  def CheckPrereq(self):
2311
    """Check prerequisites.
2312

2313
    This checks:
2314
     - the node exists in the configuration
2315
     - it does not have primary or secondary instances
2316
     - it's not the master
2317

2318
    Any errors are signaled by raising errors.OpPrereqError.
2319

2320
    """
2321
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
2322
    if node is None:
2323
      raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
2324

    
2325
    instance_list = self.cfg.GetInstanceList()
2326

    
2327
    masternode = self.cfg.GetMasterNode()
2328
    if node.name == masternode:
2329
      raise errors.OpPrereqError("Node is the master node,"
2330
                                 " you need to failover first.")
2331

    
2332
    for instance_name in instance_list:
2333
      instance = self.cfg.GetInstanceInfo(instance_name)
2334
      if node.name in instance.all_nodes:
2335
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2336
                                   " please remove first." % instance_name)
2337
    self.op.node_name = node.name
2338
    self.node = node
2339

    
2340
  def Exec(self, feedback_fn):
2341
    """Removes the node from the cluster.
2342

2343
    """
2344
    node = self.node
2345
    logging.info("Stopping the node daemon and removing configs from node %s",
2346
                 node.name)
2347

    
2348
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2349

    
2350
    # Promote nodes to master candidate as needed
2351
    _AdjustCandidatePool(self, exceptions=[node.name])
2352
    self.context.RemoveNode(node.name)
2353

    
2354
    # Run post hooks on the node before it's removed
2355
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2356
    try:
2357
      h_results = hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2358
    except:
2359
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
2360

    
2361
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2362
    msg = result.fail_msg
2363
    if msg:
2364
      self.LogWarning("Errors encountered on the remote node while leaving"
2365
                      " the cluster: %s", msg)
2366

    
2367

    
2368
class LUQueryNodes(NoHooksLU):
2369
  """Logical unit for querying nodes.
2370

2371
  """
2372
  _OP_REQP = ["output_fields", "names", "use_locking"]
2373
  REQ_BGL = False
2374

    
2375
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2376
                    "master_candidate", "offline", "drained"]
2377

    
2378
  _FIELDS_DYNAMIC = utils.FieldSet(
2379
    "dtotal", "dfree",
2380
    "mtotal", "mnode", "mfree",
2381
    "bootid",
2382
    "ctotal", "cnodes", "csockets",
2383
    )
2384

    
2385
  _FIELDS_STATIC = utils.FieldSet(*[
2386
    "pinst_cnt", "sinst_cnt",
2387
    "pinst_list", "sinst_list",
2388
    "pip", "sip", "tags",
2389
    "master",
2390
    "role"] + _SIMPLE_FIELDS
2391
    )
2392

    
2393
  def ExpandNames(self):
2394
    _CheckOutputFields(static=self._FIELDS_STATIC,
2395
                       dynamic=self._FIELDS_DYNAMIC,
2396
                       selected=self.op.output_fields)
2397

    
2398
    self.needed_locks = {}
2399
    self.share_locks[locking.LEVEL_NODE] = 1
2400

    
2401
    if self.op.names:
2402
      self.wanted = _GetWantedNodes(self, self.op.names)
2403
    else:
2404
      self.wanted = locking.ALL_SET
2405

    
2406
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2407
    self.do_locking = self.do_node_query and self.op.use_locking
2408
    if self.do_locking:
2409
      # if we don't request only static fields, we need to lock the nodes
2410
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2411

    
2412
  def CheckPrereq(self):
2413
    """Check prerequisites.
2414

2415
    """
2416
    # The validation of the node list is done in the _GetWantedNodes,
2417
    # if non empty, and if empty, there's no validation to do
2418
    pass
2419

    
2420
  def Exec(self, feedback_fn):
2421
    """Computes the list of nodes and their attributes.
2422

2423
    """
2424
    all_info = self.cfg.GetAllNodesInfo()
2425
    if self.do_locking:
2426
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2427
    elif self.wanted != locking.ALL_SET:
2428
      nodenames = self.wanted
2429
      missing = set(nodenames).difference(all_info.keys())
2430
      if missing:
2431
        raise errors.OpExecError(
2432
          "Some nodes were removed before retrieving their data: %s" % missing)
2433
    else:
2434
      nodenames = all_info.keys()
2435

    
2436
    nodenames = utils.NiceSort(nodenames)
2437
    nodelist = [all_info[name] for name in nodenames]
2438

    
2439
    # begin data gathering
2440

    
2441
    if self.do_node_query:
2442
      live_data = {}
2443
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2444
                                          self.cfg.GetHypervisorType())
2445
      for name in nodenames:
2446
        nodeinfo = node_data[name]
2447
        if not nodeinfo.fail_msg and nodeinfo.payload:
2448
          nodeinfo = nodeinfo.payload
2449
          fn = utils.TryConvert
2450
          live_data[name] = {
2451
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2452
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2453
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2454
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2455
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2456
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2457
            "bootid": nodeinfo.get('bootid', None),
2458
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2459
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2460
            }
2461
        else:
2462
          live_data[name] = {}
2463
    else:
2464
      live_data = dict.fromkeys(nodenames, {})
2465

    
2466
    node_to_primary = dict([(name, set()) for name in nodenames])
2467
    node_to_secondary = dict([(name, set()) for name in nodenames])
2468

    
2469
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2470
                             "sinst_cnt", "sinst_list"))
2471
    if inst_fields & frozenset(self.op.output_fields):
2472
      instancelist = self.cfg.GetInstanceList()
2473

    
2474
      for instance_name in instancelist:
2475
        inst = self.cfg.GetInstanceInfo(instance_name)
2476
        if inst.primary_node in node_to_primary:
2477
          node_to_primary[inst.primary_node].add(inst.name)
2478
        for secnode in inst.secondary_nodes:
2479
          if secnode in node_to_secondary:
2480
            node_to_secondary[secnode].add(inst.name)
2481

    
2482
    master_node = self.cfg.GetMasterNode()
2483

    
2484
    # end data gathering
2485

    
2486
    output = []
2487
    for node in nodelist:
2488
      node_output = []
2489
      for field in self.op.output_fields:
2490
        if field in self._SIMPLE_FIELDS:
2491
          val = getattr(node, field)
2492
        elif field == "pinst_list":
2493
          val = list(node_to_primary[node.name])
2494
        elif field == "sinst_list":
2495
          val = list(node_to_secondary[node.name])
2496
        elif field == "pinst_cnt":
2497
          val = len(node_to_primary[node.name])
2498
        elif field == "sinst_cnt":
2499
          val = len(node_to_secondary[node.name])
2500
        elif field == "pip":
2501
          val = node.primary_ip
2502
        elif field == "sip":
2503
          val = node.secondary_ip
2504
        elif field == "tags":
2505
          val = list(node.GetTags())
2506
        elif field == "master":
2507
          val = node.name == master_node
2508
        elif self._FIELDS_DYNAMIC.Matches(field):
2509
          val = live_data[node.name].get(field, None)
2510
        elif field == "role":
2511
          if node.name == master_node:
2512
            val = "M"
2513
          elif node.master_candidate:
2514
            val = "C"
2515
          elif node.drained:
2516
            val = "D"
2517
          elif node.offline:
2518
            val = "O"
2519
          else:
2520
            val = "R"
2521
        else:
2522
          raise errors.ParameterError(field)
2523
        node_output.append(val)
2524
      output.append(node_output)
2525

    
2526
    return output
2527

    
2528

    
2529
class LUQueryNodeVolumes(NoHooksLU):
2530
  """Logical unit for getting volumes on node(s).
2531

2532
  """
2533
  _OP_REQP = ["nodes", "output_fields"]
2534
  REQ_BGL = False
2535
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2536
  _FIELDS_STATIC = utils.FieldSet("node")
2537

    
2538
  def ExpandNames(self):
2539
    _CheckOutputFields(static=self._FIELDS_STATIC,
2540
                       dynamic=self._FIELDS_DYNAMIC,
2541
                       selected=self.op.output_fields)
2542

    
2543
    self.needed_locks = {}
2544
    self.share_locks[locking.LEVEL_NODE] = 1
2545
    if not self.op.nodes:
2546
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2547
    else:
2548
      self.needed_locks[locking.LEVEL_NODE] = \
2549
        _GetWantedNodes(self, self.op.nodes)
2550

    
2551
  def CheckPrereq(self):
2552
    """Check prerequisites.
2553

2554
    This checks that the fields required are valid output fields.
2555

2556
    """
2557
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2558

    
2559
  def Exec(self, feedback_fn):
2560
    """Computes the list of nodes and their attributes.
2561

2562
    """
2563
    nodenames = self.nodes
2564
    volumes = self.rpc.call_node_volumes(nodenames)
2565

    
2566
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2567
             in self.cfg.GetInstanceList()]
2568

    
2569
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2570

    
2571
    output = []
2572
    for node in nodenames:
2573
      nresult = volumes[node]
2574
      if nresult.offline:
2575
        continue
2576
      msg = nresult.fail_msg
2577
      if msg:
2578
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2579
        continue
2580

    
2581
      node_vols = nresult.payload[:]
2582
      node_vols.sort(key=lambda vol: vol['dev'])
2583

    
2584
      for vol in node_vols:
2585
        node_output = []
2586
        for field in self.op.output_fields:
2587
          if field == "node":
2588
            val = node
2589
          elif field == "phys":
2590
            val = vol['dev']
2591
          elif field == "vg":
2592
            val = vol['vg']
2593
          elif field == "name":
2594
            val = vol['name']
2595
          elif field == "size":
2596
            val = int(float(vol['size']))
2597
          elif field == "instance":
2598
            for inst in ilist:
2599
              if node not in lv_by_node[inst]:
2600
                continue
2601
              if vol['name'] in lv_by_node[inst][node]:
2602
                val = inst.name
2603
                break
2604
            else:
2605
              val = '-'
2606
          else:
2607
            raise errors.ParameterError(field)
2608
          node_output.append(str(val))
2609

    
2610
        output.append(node_output)
2611

    
2612
    return output
2613

    
2614

    
2615
class LUQueryNodeStorage(NoHooksLU):
2616
  """Logical unit for getting information on storage units on node(s).
2617

2618
  """
2619
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
2620
  REQ_BGL = False
2621
  _FIELDS_STATIC = utils.FieldSet("node")
2622

    
2623
  def ExpandNames(self):
2624
    storage_type = self.op.storage_type
2625

    
2626
    if storage_type not in constants.VALID_STORAGE_FIELDS:
2627
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type)
2628

    
2629
    dynamic_fields = constants.VALID_STORAGE_FIELDS[storage_type]
2630

    
2631
    _CheckOutputFields(static=self._FIELDS_STATIC,
2632
                       dynamic=utils.FieldSet(*dynamic_fields),
2633
                       selected=self.op.output_fields)
2634

    
2635
    self.needed_locks = {}
2636
    self.share_locks[locking.LEVEL_NODE] = 1
2637

    
2638
    if self.op.nodes:
2639
      self.needed_locks[locking.LEVEL_NODE] = \
2640
        _GetWantedNodes(self, self.op.nodes)
2641
    else:
2642
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2643

    
2644
  def CheckPrereq(self):
2645
    """Check prerequisites.
2646

2647
    This checks that the fields required are valid output fields.
2648

2649
    """
2650
    self.op.name = getattr(self.op, "name", None)
2651

    
2652
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2653

    
2654
  def Exec(self, feedback_fn):
2655
    """Computes the list of nodes and their attributes.
2656

2657
    """
2658
    # Always get name to sort by
2659
    if constants.SF_NAME in self.op.output_fields:
2660
      fields = self.op.output_fields[:]
2661
    else:
2662
      fields = [constants.SF_NAME] + self.op.output_fields
2663

    
2664
    # Never ask for node as it's only known to the LU
2665
    while "node" in fields:
2666
      fields.remove("node")
2667

    
2668
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2669
    name_idx = field_idx[constants.SF_NAME]
2670

    
2671
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2672
    data = self.rpc.call_storage_list(self.nodes,
2673
                                      self.op.storage_type, st_args,
2674
                                      self.op.name, fields)
2675

    
2676
    result = []
2677

    
2678
    for node in utils.NiceSort(self.nodes):
2679
      nresult = data[node]
2680
      if nresult.offline:
2681
        continue
2682

    
2683
      msg = nresult.fail_msg
2684
      if msg:
2685
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2686
        continue
2687

    
2688
      rows = dict([(row[name_idx], row) for row in nresult.payload])
2689

    
2690
      for name in utils.NiceSort(rows.keys()):
2691
        row = rows[name]
2692

    
2693
        out = []
2694

    
2695
        for field in self.op.output_fields:
2696
          if field == "node":
2697
            val = node
2698
          elif field in field_idx:
2699
            val = row[field_idx[field]]
2700
          else:
2701
            raise errors.ParameterError(field)
2702

    
2703
          out.append(val)
2704

    
2705
        result.append(out)
2706

    
2707
    return result
2708

    
2709

    
2710
class LUModifyNodeStorage(NoHooksLU):
2711
  """Logical unit for modifying a storage volume on a node.
2712

2713
  """
2714
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
2715
  REQ_BGL = False
2716

    
2717
  def CheckArguments(self):
2718
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2719
    if node_name is None:
2720
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2721

    
2722
    self.op.node_name = node_name
2723

    
2724
    storage_type = self.op.storage_type
2725
    if storage_type not in constants.VALID_STORAGE_FIELDS:
2726
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type)
2727

    
2728
  def ExpandNames(self):
2729
    self.needed_locks = {
2730
      locking.LEVEL_NODE: self.op.node_name,
2731
      }
2732

    
2733
  def CheckPrereq(self):
2734
    """Check prerequisites.
2735

2736
    """
2737
    storage_type = self.op.storage_type
2738

    
2739
    try:
2740
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
2741
    except KeyError:
2742
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
2743
                                 " modified" % storage_type)
2744

    
2745
    diff = set(self.op.changes.keys()) - modifiable
2746
    if diff:
2747
      raise errors.OpPrereqError("The following fields can not be modified for"
2748
                                 " storage units of type '%s': %r" %
2749
                                 (storage_type, list(diff)))
2750

    
2751
  def Exec(self, feedback_fn):
2752
    """Computes the list of nodes and their attributes.
2753

2754
    """
2755
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2756
    result = self.rpc.call_storage_modify(self.op.node_name,
2757
                                          self.op.storage_type, st_args,
2758
                                          self.op.name, self.op.changes)
2759
    result.Raise("Failed to modify storage unit '%s' on %s" %
2760
                 (self.op.name, self.op.node_name))
2761

    
2762

    
2763
class LUAddNode(LogicalUnit):
2764
  """Logical unit for adding node to the cluster.
2765

2766
  """
2767
  HPATH = "node-add"
2768
  HTYPE = constants.HTYPE_NODE
2769
  _OP_REQP = ["node_name"]
2770

    
2771
  def BuildHooksEnv(self):
2772
    """Build hooks env.
2773

2774
    This will run on all nodes before, and on all nodes + the new node after.
2775

2776
    """
2777
    env = {
2778
      "OP_TARGET": self.op.node_name,
2779
      "NODE_NAME": self.op.node_name,
2780
      "NODE_PIP": self.op.primary_ip,
2781
      "NODE_SIP": self.op.secondary_ip,
2782
      }
2783
    nodes_0 = self.cfg.GetNodeList()
2784
    nodes_1 = nodes_0 + [self.op.node_name, ]
2785
    return env, nodes_0, nodes_1
2786

    
2787
  def CheckPrereq(self):
2788
    """Check prerequisites.
2789

2790
    This checks:
2791
     - the new node is not already in the config
2792
     - it is resolvable
2793
     - its parameters (single/dual homed) matches the cluster
2794

2795
    Any errors are signaled by raising errors.OpPrereqError.
2796

2797
    """
2798
    node_name = self.op.node_name
2799
    cfg = self.cfg
2800

    
2801
    dns_data = utils.HostInfo(node_name)
2802

    
2803
    node = dns_data.name
2804
    primary_ip = self.op.primary_ip = dns_data.ip
2805
    secondary_ip = getattr(self.op, "secondary_ip", None)
2806
    if secondary_ip is None:
2807
      secondary_ip = primary_ip
2808
    if not utils.IsValidIP(secondary_ip):
2809
      raise errors.OpPrereqError("Invalid secondary IP given")
2810
    self.op.secondary_ip = secondary_ip
2811

    
2812
    node_list = cfg.GetNodeList()
2813
    if not self.op.readd and node in node_list:
2814
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2815
                                 node)
2816
    elif self.op.readd and node not in node_list:
2817
      raise errors.OpPrereqError("Node %s is not in the configuration" % node)
2818

    
2819
    for existing_node_name in node_list:
2820
      existing_node = cfg.GetNodeInfo(existing_node_name)
2821

    
2822
      if self.op.readd and node == existing_node_name:
2823
        if (existing_node.primary_ip != primary_ip or
2824
            existing_node.secondary_ip != secondary_ip):
2825
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2826
                                     " address configuration as before")
2827
        continue
2828

    
2829
      if (existing_node.primary_ip == primary_ip or
2830
          existing_node.secondary_ip == primary_ip or
2831
          existing_node.primary_ip == secondary_ip or
2832
          existing_node.secondary_ip == secondary_ip):
2833
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2834
                                   " existing node %s" % existing_node.name)
2835

    
2836
    # check that the type of the node (single versus dual homed) is the
2837
    # same as for the master
2838
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2839
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2840
    newbie_singlehomed = secondary_ip == primary_ip
2841
    if master_singlehomed != newbie_singlehomed:
2842
      if master_singlehomed:
2843
        raise errors.OpPrereqError("The master has no private ip but the"
2844
                                   " new node has one")
2845
      else:
2846
        raise errors.OpPrereqError("The master has a private ip but the"
2847
                                   " new node doesn't have one")
2848

    
2849
    # checks reachability
2850
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2851
      raise errors.OpPrereqError("Node not reachable by ping")
2852

    
2853
    if not newbie_singlehomed:
2854
      # check reachability from my secondary ip to newbie's secondary ip
2855
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2856
                           source=myself.secondary_ip):
2857
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2858
                                   " based ping to noded port")
2859

    
2860
    if self.op.readd:
2861
      exceptions = [node]
2862
    else:
2863
      exceptions = []
2864

    
2865
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
2866

    
2867
    if self.op.readd:
2868
      self.new_node = self.cfg.GetNodeInfo(node)
2869
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
2870
    else:
2871
      self.new_node = objects.Node(name=node,
2872
                                   primary_ip=primary_ip,
2873
                                   secondary_ip=secondary_ip,
2874
                                   master_candidate=self.master_candidate,
2875
                                   offline=False, drained=False)
2876

    
2877
  def Exec(self, feedback_fn):
2878
    """Adds the new node to the cluster.
2879

2880
    """
2881
    new_node = self.new_node
2882
    node = new_node.name
2883

    
2884
    # for re-adds, reset the offline/drained/master-candidate flags;
2885
    # we need to reset here, otherwise offline would prevent RPC calls
2886
    # later in the procedure; this also means that if the re-add
2887
    # fails, we are left with a non-offlined, broken node
2888
    if self.op.readd:
2889
      new_node.drained = new_node.offline = False
2890
      self.LogInfo("Readding a node, the offline/drained flags were reset")
2891
      # if we demote the node, we do cleanup later in the procedure
2892
      new_node.master_candidate = self.master_candidate
2893

    
2894
    # notify the user about any possible mc promotion
2895
    if new_node.master_candidate:
2896
      self.LogInfo("Node will be a master candidate")
2897

    
2898
    # check connectivity
2899
    result = self.rpc.call_version([node])[node]
2900
    result.Raise("Can't get version information from node %s" % node)
2901
    if constants.PROTOCOL_VERSION == result.payload:
2902
      logging.info("Communication to node %s fine, sw version %s match",
2903
                   node, result.payload)
2904
    else:
2905
      raise errors.OpExecError("Version mismatch master version %s,"
2906
                               " node version %s" %
2907
                               (constants.PROTOCOL_VERSION, result.payload))
2908

    
2909
    # setup ssh on node
2910
    if self.cfg.GetClusterInfo().modify_ssh_setup:
2911
      logging.info("Copy ssh key to node %s", node)
2912
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
2913
      keyarray = []
2914
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
2915
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
2916
                  priv_key, pub_key]
2917

    
2918
      for i in keyfiles:
2919
        keyarray.append(utils.ReadFile(i))
2920

    
2921
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
2922
                                      keyarray[2], keyarray[3], keyarray[4],
2923
                                      keyarray[5])
2924
      result.Raise("Cannot transfer ssh keys to the new node")
2925

    
2926
    # Add node to our /etc/hosts, and add key to known_hosts
2927
    if self.cfg.GetClusterInfo().modify_etc_hosts:
2928
      utils.AddHostToEtcHosts(new_node.name)
2929

    
2930
    if new_node.secondary_ip != new_node.primary_ip:
2931
      result = self.rpc.call_node_has_ip_address(new_node.name,
2932
                                                 new_node.secondary_ip)
2933
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
2934
                   prereq=True)
2935
      if not result.payload:
2936
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
2937
                                 " you gave (%s). Please fix and re-run this"
2938
                                 " command." % new_node.secondary_ip)
2939

    
2940
    node_verify_list = [self.cfg.GetMasterNode()]
2941
    node_verify_param = {
2942
      constants.NV_NODELIST: [node],
2943
      # TODO: do a node-net-test as well?
2944
    }
2945

    
2946
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2947
                                       self.cfg.GetClusterName())
2948
    for verifier in node_verify_list:
2949
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
2950
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
2951
      if nl_payload:
2952
        for failed in nl_payload:
2953
          feedback_fn("ssh/hostname verification failed"
2954
                      " (checking from %s): %s" %
2955
                      (verifier, nl_payload[failed]))
2956
        raise errors.OpExecError("ssh/hostname verification failed.")
2957

    
2958
    if self.op.readd:
2959
      _RedistributeAncillaryFiles(self)
2960
      self.context.ReaddNode(new_node)
2961
      # make sure we redistribute the config
2962
      self.cfg.Update(new_node, feedback_fn)
2963
      # and make sure the new node will not have old files around
2964
      if not new_node.master_candidate:
2965
        result = self.rpc.call_node_demote_from_mc(new_node.name)
2966
        msg = result.fail_msg
2967
        if msg:
2968
          self.LogWarning("Node failed to demote itself from master"
2969
                          " candidate status: %s" % msg)
2970
    else:
2971
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
2972
      self.context.AddNode(new_node)
2973

    
2974

    
2975
class LUSetNodeParams(LogicalUnit):
2976
  """Modifies the parameters of a node.
2977

2978
  """
2979
  HPATH = "node-modify"
2980
  HTYPE = constants.HTYPE_NODE
2981
  _OP_REQP = ["node_name"]
2982
  REQ_BGL = False
2983

    
2984
  def CheckArguments(self):
2985
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2986
    if node_name is None:
2987
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2988
    self.op.node_name = node_name
2989
    _CheckBooleanOpField(self.op, 'master_candidate')
2990
    _CheckBooleanOpField(self.op, 'offline')
2991
    _CheckBooleanOpField(self.op, 'drained')
2992
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
2993
    if all_mods.count(None) == 3:
2994
      raise errors.OpPrereqError("Please pass at least one modification")
2995
    if all_mods.count(True) > 1:
2996
      raise errors.OpPrereqError("Can't set the node into more than one"
2997
                                 " state at the same time")
2998

    
2999
  def ExpandNames(self):
3000
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3001

    
3002
  def BuildHooksEnv(self):
3003
    """Build hooks env.
3004

3005
    This runs on the master node.
3006

3007
    """
3008
    env = {
3009
      "OP_TARGET": self.op.node_name,
3010
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3011
      "OFFLINE": str(self.op.offline),
3012
      "DRAINED": str(self.op.drained),
3013
      }
3014
    nl = [self.cfg.GetMasterNode(),
3015
          self.op.node_name]
3016
    return env, nl, nl
3017

    
3018
  def CheckPrereq(self):
3019
    """Check prerequisites.
3020

3021
    This only checks the instance list against the existing names.
3022

3023
    """
3024
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3025

    
3026
    if (self.op.master_candidate is not None or
3027
        self.op.drained is not None or
3028
        self.op.offline is not None):
3029
      # we can't change the master's node flags
3030
      if self.op.node_name == self.cfg.GetMasterNode():
3031
        raise errors.OpPrereqError("The master role can be changed"
3032
                                   " only via masterfailover")
3033

    
3034
    # Boolean value that tells us whether we're offlining or draining the node
3035
    offline_or_drain = self.op.offline == True or self.op.drained == True
3036
    deoffline_or_drain = self.op.offline == False or self.op.drained == False
3037

    
3038
    if (node.master_candidate and
3039
        (self.op.master_candidate == False or offline_or_drain)):
3040
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
3041
      mc_now, mc_should, mc_max = self.cfg.GetMasterCandidateStats()
3042
      if mc_now <= cp_size:
3043
        msg = ("Not enough master candidates (desired"
3044
               " %d, new value will be %d)" % (cp_size, mc_now-1))
3045
        # Only allow forcing the operation if it's an offline/drain operation,
3046
        # and we could not possibly promote more nodes.
3047
        # FIXME: this can still lead to issues if in any way another node which
3048
        # could be promoted appears in the meantime.
3049
        if self.op.force and offline_or_drain and mc_should == mc_max:
3050
          self.LogWarning(msg)
3051
        else:
3052
          raise errors.OpPrereqError(msg)
3053

    
3054
    if (self.op.master_candidate == True and
3055
        ((node.offline and not self.op.offline == False) or
3056
         (node.drained and not self.op.drained == False))):
3057
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3058
                                 " to master_candidate" % node.name)
3059

    
3060
    # If we're being deofflined/drained, we'll MC ourself if needed
3061
    if (deoffline_or_drain and not offline_or_drain and not
3062
        self.op.master_candidate == True):
3063
      self.op.master_candidate = _DecideSelfPromotion(self)
3064
      if self.op.master_candidate:
3065
        self.LogInfo("Autopromoting node to master candidate")
3066

    
3067
    return
3068

    
3069
  def Exec(self, feedback_fn):
3070
    """Modifies a node.
3071

3072
    """
3073
    node = self.node
3074

    
3075
    result = []
3076
    changed_mc = False
3077

    
3078
    if self.op.offline is not None:
3079
      node.offline = self.op.offline
3080
      result.append(("offline", str(self.op.offline)))
3081
      if self.op.offline == True:
3082
        if node.master_candidate:
3083
          node.master_candidate = False
3084
          changed_mc = True
3085
          result.append(("master_candidate", "auto-demotion due to offline"))
3086
        if node.drained:
3087
          node.drained = False
3088
          result.append(("drained", "clear drained status due to offline"))
3089

    
3090
    if self.op.master_candidate is not None:
3091
      node.master_candidate = self.op.master_candidate
3092
      changed_mc = True
3093
      result.append(("master_candidate", str(self.op.master_candidate)))
3094
      if self.op.master_candidate == False:
3095
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3096
        msg = rrc.fail_msg
3097
        if msg:
3098
          self.LogWarning("Node failed to demote itself: %s" % msg)
3099

    
3100
    if self.op.drained is not None:
3101
      node.drained = self.op.drained
3102
      result.append(("drained", str(self.op.drained)))
3103
      if self.op.drained == True:
3104
        if node.master_candidate:
3105
          node.master_candidate = False
3106
          changed_mc = True
3107
          result.append(("master_candidate", "auto-demotion due to drain"))
3108
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3109
          msg = rrc.fail_msg
3110
          if msg:
3111
            self.LogWarning("Node failed to demote itself: %s" % msg)
3112
        if node.offline:
3113
          node.offline = False
3114
          result.append(("offline", "clear offline status due to drain"))
3115

    
3116
    # this will trigger configuration file update, if needed
3117
    self.cfg.Update(node, feedback_fn)
3118
    # this will trigger job queue propagation or cleanup
3119
    if changed_mc:
3120
      self.context.ReaddNode(node)
3121

    
3122
    return result
3123

    
3124

    
3125
class LUPowercycleNode(NoHooksLU):
3126
  """Powercycles a node.
3127

3128
  """
3129
  _OP_REQP = ["node_name", "force"]
3130
  REQ_BGL = False
3131

    
3132
  def CheckArguments(self):
3133
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
3134
    if node_name is None:
3135
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
3136
    self.op.node_name = node_name
3137
    if node_name == self.cfg.GetMasterNode() and not self.op.force:
3138
      raise errors.OpPrereqError("The node is the master and the force"
3139
                                 " parameter was not set")
3140

    
3141
  def ExpandNames(self):
3142
    """Locking for PowercycleNode.
3143

3144
    This is a last-resort option and shouldn't block on other
3145
    jobs. Therefore, we grab no locks.
3146

3147
    """
3148
    self.needed_locks = {}
3149

    
3150
  def CheckPrereq(self):
3151
    """Check prerequisites.
3152

3153
    This LU has no prereqs.
3154

3155
    """
3156
    pass
3157

    
3158
  def Exec(self, feedback_fn):
3159
    """Reboots a node.
3160

3161
    """
3162
    result = self.rpc.call_node_powercycle(self.op.node_name,
3163
                                           self.cfg.GetHypervisorType())
3164
    result.Raise("Failed to schedule the reboot")
3165
    return result.payload
3166

    
3167

    
3168
class LUQueryClusterInfo(NoHooksLU):
3169
  """Query cluster configuration.
3170

3171
  """
3172
  _OP_REQP = []
3173
  REQ_BGL = False
3174

    
3175
  def ExpandNames(self):
3176
    self.needed_locks = {}
3177

    
3178
  def CheckPrereq(self):
3179
    """No prerequsites needed for this LU.
3180

3181
    """
3182
    pass
3183

    
3184
  def Exec(self, feedback_fn):
3185
    """Return cluster config.
3186

3187
    """
3188
    cluster = self.cfg.GetClusterInfo()
3189
    result = {
3190
      "software_version": constants.RELEASE_VERSION,
3191
      "protocol_version": constants.PROTOCOL_VERSION,
3192
      "config_version": constants.CONFIG_VERSION,
3193
      "os_api_version": max(constants.OS_API_VERSIONS),
3194
      "export_version": constants.EXPORT_VERSION,
3195
      "architecture": (platform.architecture()[0], platform.machine()),
3196
      "name": cluster.cluster_name,
3197
      "master": cluster.master_node,
3198
      "default_hypervisor": cluster.enabled_hypervisors[0],
3199
      "enabled_hypervisors": cluster.enabled_hypervisors,
3200
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3201
                        for hypervisor_name in cluster.enabled_hypervisors]),
3202
      "beparams": cluster.beparams,
3203
      "nicparams": cluster.nicparams,
3204
      "candidate_pool_size": cluster.candidate_pool_size,
3205
      "master_netdev": cluster.master_netdev,
3206
      "volume_group_name": cluster.volume_group_name,
3207
      "file_storage_dir": cluster.file_storage_dir,
3208
      "ctime": cluster.ctime,
3209
      "mtime": cluster.mtime,
3210
      "uuid": cluster.uuid,
3211
      "tags": list(cluster.GetTags()),
3212
      }
3213

    
3214
    return result
3215

    
3216

    
3217
class LUQueryConfigValues(NoHooksLU):
3218
  """Return configuration values.
3219

3220
  """
3221
  _OP_REQP = []
3222
  REQ_BGL = False
3223
  _FIELDS_DYNAMIC = utils.FieldSet()
3224
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3225
                                  "watcher_pause")
3226

    
3227
  def ExpandNames(self):
3228
    self.needed_locks = {}
3229

    
3230
    _CheckOutputFields(static=self._FIELDS_STATIC,
3231
                       dynamic=self._FIELDS_DYNAMIC,
3232
                       selected=self.op.output_fields)
3233

    
3234
  def CheckPrereq(self):
3235
    """No prerequisites.
3236

3237
    """
3238
    pass
3239

    
3240
  def Exec(self, feedback_fn):
3241
    """Dump a representation of the cluster config to the standard output.
3242

3243
    """
3244
    values = []
3245
    for field in self.op.output_fields:
3246
      if field == "cluster_name":
3247
        entry = self.cfg.GetClusterName()
3248
      elif field == "master_node":
3249
        entry = self.cfg.GetMasterNode()
3250
      elif field == "drain_flag":
3251
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3252
      elif field == "watcher_pause":
3253
        return utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3254
      else:
3255
        raise errors.ParameterError(field)
3256
      values.append(entry)
3257
    return values
3258

    
3259

    
3260
class LUActivateInstanceDisks(NoHooksLU):
3261
  """Bring up an instance's disks.
3262

3263
  """
3264
  _OP_REQP = ["instance_name"]
3265
  REQ_BGL = False
3266

    
3267
  def ExpandNames(self):
3268
    self._ExpandAndLockInstance()
3269
    self.needed_locks[locking.LEVEL_NODE] = []
3270
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3271

    
3272
  def DeclareLocks(self, level):
3273
    if level == locking.LEVEL_NODE:
3274
      self._LockInstancesNodes()
3275

    
3276
  def CheckPrereq(self):
3277
    """Check prerequisites.
3278

3279
    This checks that the instance is in the cluster.
3280

3281
    """
3282
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3283
    assert self.instance is not None, \
3284
      "Cannot retrieve locked instance %s" % self.op.instance_name
3285
    _CheckNodeOnline(self, self.instance.primary_node)
3286
    if not hasattr(self.op, "ignore_size"):
3287
      self.op.ignore_size = False
3288

    
3289
  def Exec(self, feedback_fn):
3290
    """Activate the disks.
3291

3292
    """
3293
    disks_ok, disks_info = \
3294
              _AssembleInstanceDisks(self, self.instance,
3295
                                     ignore_size=self.op.ignore_size)
3296
    if not disks_ok:
3297
      raise errors.OpExecError("Cannot activate block devices")
3298

    
3299
    return disks_info
3300

    
3301

    
3302
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3303
                           ignore_size=False):
3304
  """Prepare the block devices for an instance.
3305

3306
  This sets up the block devices on all nodes.
3307

3308
  @type lu: L{LogicalUnit}
3309
  @param lu: the logical unit on whose behalf we execute
3310
  @type instance: L{objects.Instance}
3311
  @param instance: the instance for whose disks we assemble
3312
  @type ignore_secondaries: boolean
3313
  @param ignore_secondaries: if true, errors on secondary nodes
3314
      won't result in an error return from the function
3315
  @type ignore_size: boolean
3316
  @param ignore_size: if true, the current known size of the disk
3317
      will not be used during the disk activation, useful for cases
3318
      when the size is wrong
3319
  @return: False if the operation failed, otherwise a list of
3320
      (host, instance_visible_name, node_visible_name)
3321
      with the mapping from node devices to instance devices
3322

3323
  """
3324
  device_info = []
3325
  disks_ok = True
3326
  iname = instance.name
3327
  # With the two passes mechanism we try to reduce the window of
3328
  # opportunity for the race condition of switching DRBD to primary
3329
  # before handshaking occured, but we do not eliminate it
3330

    
3331
  # The proper fix would be to wait (with some limits) until the
3332
  # connection has been made and drbd transitions from WFConnection
3333
  # into any other network-connected state (Connected, SyncTarget,
3334
  # SyncSource, etc.)
3335

    
3336
  # 1st pass, assemble on all nodes in secondary mode
3337
  for inst_disk in instance.disks:
3338
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3339
      if ignore_size:
3340
        node_disk = node_disk.Copy()
3341
        node_disk.UnsetSize()
3342
      lu.cfg.SetDiskID(node_disk, node)
3343
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3344
      msg = result.fail_msg
3345
      if msg:
3346
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3347
                           " (is_primary=False, pass=1): %s",
3348
                           inst_disk.iv_name, node, msg)
3349
        if not ignore_secondaries:
3350
          disks_ok = False
3351

    
3352
  # FIXME: race condition on drbd migration to primary
3353

    
3354
  # 2nd pass, do only the primary node
3355
  for inst_disk in instance.disks:
3356
    dev_path = None
3357

    
3358
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3359
      if node != instance.primary_node:
3360
        continue
3361
      if ignore_size:
3362
        node_disk = node_disk.Copy()
3363
        node_disk.UnsetSize()
3364
      lu.cfg.SetDiskID(node_disk, node)
3365
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3366
      msg = result.fail_msg
3367
      if msg:
3368
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3369
                           " (is_primary=True, pass=2): %s",
3370
                           inst_disk.iv_name, node, msg)
3371
        disks_ok = False
3372
      else:
3373
        dev_path = result.payload
3374

    
3375
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3376

    
3377
  # leave the disks configured for the primary node
3378
  # this is a workaround that would be fixed better by
3379
  # improving the logical/physical id handling
3380
  for disk in instance.disks:
3381
    lu.cfg.SetDiskID(disk, instance.primary_node)
3382

    
3383
  return disks_ok, device_info
3384

    
3385

    
3386
def _StartInstanceDisks(lu, instance, force):
3387
  """Start the disks of an instance.
3388

3389
  """
3390
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3391
                                           ignore_secondaries=force)
3392
  if not disks_ok:
3393
    _ShutdownInstanceDisks(lu, instance)
3394
    if force is not None and not force:
3395
      lu.proc.LogWarning("", hint="If the message above refers to a"
3396
                         " secondary node,"
3397
                         " you can retry the operation using '--force'.")
3398
    raise errors.OpExecError("Disk consistency error")
3399

    
3400

    
3401
class LUDeactivateInstanceDisks(NoHooksLU):
3402
  """Shutdown an instance's disks.
3403

3404
  """
3405
  _OP_REQP = ["instance_name"]
3406
  REQ_BGL = False
3407

    
3408
  def ExpandNames(self):
3409
    self._ExpandAndLockInstance()
3410
    self.needed_locks[locking.LEVEL_NODE] = []
3411
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3412

    
3413
  def DeclareLocks(self, level):
3414
    if level == locking.LEVEL_NODE:
3415
      self._LockInstancesNodes()
3416

    
3417
  def CheckPrereq(self):
3418
    """Check prerequisites.
3419

3420
    This checks that the instance is in the cluster.
3421

3422
    """
3423
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3424
    assert self.instance is not None, \
3425
      "Cannot retrieve locked instance %s" % self.op.instance_name
3426

    
3427
  def Exec(self, feedback_fn):
3428
    """Deactivate the disks
3429

3430
    """
3431
    instance = self.instance
3432
    _SafeShutdownInstanceDisks(self, instance)
3433

    
3434

    
3435
def _SafeShutdownInstanceDisks(lu, instance):
3436
  """Shutdown block devices of an instance.
3437

3438
  This function checks if an instance is running, before calling
3439
  _ShutdownInstanceDisks.
3440

3441
  """
3442
  pnode = instance.primary_node
3443
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
3444
  ins_l.Raise("Can't contact node %s" % pnode)
3445

    
3446
  if instance.name in ins_l.payload:
3447
    raise errors.OpExecError("Instance is running, can't shutdown"
3448
                             " block devices.")
3449

    
3450
  _ShutdownInstanceDisks(lu, instance)
3451

    
3452

    
3453
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3454
  """Shutdown block devices of an instance.
3455

3456
  This does the shutdown on all nodes of the instance.
3457

3458
  If the ignore_primary is false, errors on the primary node are
3459
  ignored.
3460

3461
  """
3462
  all_result = True
3463
  for disk in instance.disks:
3464
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3465
      lu.cfg.SetDiskID(top_disk, node)
3466
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3467
      msg = result.fail_msg
3468
      if msg:
3469
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3470
                      disk.iv_name, node, msg)
3471
        if not ignore_primary or node != instance.primary_node:
3472
          all_result = False
3473
  return all_result
3474

    
3475

    
3476
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3477
  """Checks if a node has enough free memory.
3478

3479
  This function check if a given node has the needed amount of free
3480
  memory. In case the node has less memory or we cannot get the
3481
  information from the node, this function raise an OpPrereqError
3482
  exception.
3483

3484
  @type lu: C{LogicalUnit}
3485
  @param lu: a logical unit from which we get configuration data
3486
  @type node: C{str}
3487
  @param node: the node to check
3488
  @type reason: C{str}
3489
  @param reason: string to use in the error message
3490
  @type requested: C{int}
3491
  @param requested: the amount of memory in MiB to check for
3492
  @type hypervisor_name: C{str}
3493
  @param hypervisor_name: the hypervisor to ask for memory stats
3494
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3495
      we cannot check the node
3496

3497
  """
3498
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3499
  nodeinfo[node].Raise("Can't get data from node %s" % node, prereq=True)
3500
  free_mem = nodeinfo[node].payload.get('memory_free', None)
3501
  if not isinstance(free_mem, int):
3502
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3503
                               " was '%s'" % (node, free_mem))
3504
  if requested > free_mem:
3505
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3506
                               " needed %s MiB, available %s MiB" %
3507
                               (node, reason, requested, free_mem))
3508

    
3509

    
3510
class LUStartupInstance(LogicalUnit):
3511
  """Starts an instance.
3512

3513
  """
3514
  HPATH = "instance-start"
3515
  HTYPE = constants.HTYPE_INSTANCE
3516
  _OP_REQP = ["instance_name", "force"]
3517
  REQ_BGL = False
3518

    
3519
  def ExpandNames(self):
3520
    self._ExpandAndLockInstance()
3521

    
3522
  def BuildHooksEnv(self):
3523
    """Build hooks env.
3524

3525
    This runs on master, primary and secondary nodes of the instance.
3526

3527
    """
3528
    env = {
3529
      "FORCE": self.op.force,
3530
      }
3531
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3532
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3533
    return env, nl, nl
3534

    
3535
  def CheckPrereq(self):
3536
    """Check prerequisites.
3537

3538
    This checks that the instance is in the cluster.
3539

3540
    """
3541
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3542
    assert self.instance is not None, \
3543
      "Cannot retrieve locked instance %s" % self.op.instance_name
3544

    
3545
    # extra beparams
3546
    self.beparams = getattr(self.op, "beparams", {})
3547
    if self.beparams:
3548
      if not isinstance(self.beparams, dict):
3549
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3550
                                   " dict" % (type(self.beparams), ))
3551
      # fill the beparams dict
3552
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3553
      self.op.beparams = self.beparams
3554

    
3555
    # extra hvparams
3556
    self.hvparams = getattr(self.op, "hvparams", {})
3557
    if self.hvparams:
3558
      if not isinstance(self.hvparams, dict):
3559
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3560
                                   " dict" % (type(self.hvparams), ))
3561

    
3562
      # check hypervisor parameter syntax (locally)
3563
      cluster = self.cfg.GetClusterInfo()
3564
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3565
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3566
                                    instance.hvparams)
3567
      filled_hvp.update(self.hvparams)
3568
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3569
      hv_type.CheckParameterSyntax(filled_hvp)
3570
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3571
      self.op.hvparams = self.hvparams
3572

    
3573
    _CheckNodeOnline(self, instance.primary_node)
3574

    
3575
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3576
    # check bridges existence
3577
    _CheckInstanceBridgesExist(self, instance)
3578

    
3579
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3580
                                              instance.name,
3581
                                              instance.hypervisor)
3582
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3583
                      prereq=True)
3584
    if not remote_info.payload: # not running already
3585
      _CheckNodeFreeMemory(self, instance.primary_node,
3586
                           "starting instance %s" % instance.name,
3587
                           bep[constants.BE_MEMORY], instance.hypervisor)
3588

    
3589
  def Exec(self, feedback_fn):
3590
    """Start the instance.
3591

3592
    """
3593
    instance = self.instance
3594
    force = self.op.force
3595

    
3596
    self.cfg.MarkInstanceUp(instance.name)
3597

    
3598
    node_current = instance.primary_node
3599

    
3600
    _StartInstanceDisks(self, instance, force)
3601

    
3602
    result = self.rpc.call_instance_start(node_current, instance,
3603
                                          self.hvparams, self.beparams)
3604
    msg = result.fail_msg
3605
    if msg:
3606
      _ShutdownInstanceDisks(self, instance)
3607
      raise errors.OpExecError("Could not start instance: %s" % msg)
3608

    
3609

    
3610
class LURebootInstance(LogicalUnit):
3611
  """Reboot an instance.
3612

3613
  """
3614
  HPATH = "instance-reboot"
3615
  HTYPE = constants.HTYPE_INSTANCE
3616
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3617
  REQ_BGL = False
3618

    
3619
  def CheckArguments(self):
3620
    """Check the arguments.
3621

3622
    """
3623
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
3624
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
3625

    
3626
  def ExpandNames(self):
3627
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3628
                                   constants.INSTANCE_REBOOT_HARD,
3629
                                   constants.INSTANCE_REBOOT_FULL]:
3630
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3631
                                  (constants.INSTANCE_REBOOT_SOFT,
3632
                                   constants.INSTANCE_REBOOT_HARD,
3633
                                   constants.INSTANCE_REBOOT_FULL))
3634
    self._ExpandAndLockInstance()
3635

    
3636
  def BuildHooksEnv(self):
3637
    """Build hooks env.
3638

3639
    This runs on master, primary and secondary nodes of the instance.
3640

3641
    """
3642
    env = {
3643
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3644
      "REBOOT_TYPE": self.op.reboot_type,
3645
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
3646
      }
3647
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3648
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3649
    return env, nl, nl
3650

    
3651
  def CheckPrereq(self):
3652
    """Check prerequisites.
3653

3654
    This checks that the instance is in the cluster.
3655

3656
    """
3657
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3658
    assert self.instance is not None, \
3659
      "Cannot retrieve locked instance %s" % self.op.instance_name
3660

    
3661
    _CheckNodeOnline(self, instance.primary_node)
3662

    
3663
    # check bridges existence
3664
    _CheckInstanceBridgesExist(self, instance)
3665

    
3666
  def Exec(self, feedback_fn):
3667
    """Reboot the instance.
3668

3669
    """
3670
    instance = self.instance
3671
    ignore_secondaries = self.op.ignore_secondaries
3672
    reboot_type = self.op.reboot_type
3673

    
3674
    node_current = instance.primary_node
3675

    
3676
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3677
                       constants.INSTANCE_REBOOT_HARD]:
3678
      for disk in instance.disks:
3679
        self.cfg.SetDiskID(disk, node_current)
3680
      result = self.rpc.call_instance_reboot(node_current, instance,
3681
                                             reboot_type,
3682
                                             self.shutdown_timeout)
3683
      result.Raise("Could not reboot instance")
3684
    else:
3685
      result = self.rpc.call_instance_shutdown(node_current, instance,
3686
                                               self.shutdown_timeout)
3687
      result.Raise("Could not shutdown instance for full reboot")
3688
      _ShutdownInstanceDisks(self, instance)
3689
      _StartInstanceDisks(self, instance, ignore_secondaries)
3690
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3691
      msg = result.fail_msg
3692
      if msg:
3693
        _ShutdownInstanceDisks(self, instance)
3694
        raise errors.OpExecError("Could not start instance for"
3695
                                 " full reboot: %s" % msg)
3696

    
3697
    self.cfg.MarkInstanceUp(instance.name)
3698

    
3699

    
3700
class LUShutdownInstance(LogicalUnit):
3701
  """Shutdown an instance.
3702

3703
  """
3704
  HPATH = "instance-stop"
3705
  HTYPE = constants.HTYPE_INSTANCE
3706
  _OP_REQP = ["instance_name"]
3707
  REQ_BGL = False
3708

    
3709
  def CheckArguments(self):
3710
    """Check the arguments.
3711

3712
    """
3713
    self.timeout = getattr(self.op, "timeout",
3714
                           constants.DEFAULT_SHUTDOWN_TIMEOUT)
3715

    
3716
  def ExpandNames(self):
3717
    self._ExpandAndLockInstance()
3718

    
3719
  def BuildHooksEnv(self):
3720
    """Build hooks env.
3721

3722
    This runs on master, primary and secondary nodes of the instance.
3723

3724
    """
3725
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3726
    env["TIMEOUT"] = self.timeout
3727
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3728
    return env, nl, nl
3729

    
3730
  def CheckPrereq(self):
3731
    """Check prerequisites.
3732

3733
    This checks that the instance is in the cluster.
3734

3735
    """
3736
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3737
    assert self.instance is not None, \
3738
      "Cannot retrieve locked instance %s" % self.op.instance_name
3739
    _CheckNodeOnline(self, self.instance.primary_node)
3740

    
3741
  def Exec(self, feedback_fn):
3742
    """Shutdown the instance.
3743

3744
    """
3745
    instance = self.instance
3746
    node_current = instance.primary_node
3747
    timeout = self.timeout
3748
    self.cfg.MarkInstanceDown(instance.name)
3749
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
3750
    msg = result.fail_msg
3751
    if msg:
3752
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3753

    
3754
    _ShutdownInstanceDisks(self, instance)
3755

    
3756

    
3757
class LUReinstallInstance(LogicalUnit):
3758
  """Reinstall an instance.
3759

3760
  """
3761
  HPATH = "instance-reinstall"
3762
  HTYPE = constants.HTYPE_INSTANCE
3763
  _OP_REQP = ["instance_name"]
3764
  REQ_BGL = False
3765

    
3766
  def ExpandNames(self):
3767
    self._ExpandAndLockInstance()
3768

    
3769
  def BuildHooksEnv(self):
3770
    """Build hooks env.
3771

3772
    This runs on master, primary and secondary nodes of the instance.
3773

3774
    """
3775
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3776
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3777
    return env, nl, nl
3778

    
3779
  def CheckPrereq(self):
3780
    """Check prerequisites.
3781

3782
    This checks that the instance is in the cluster and is not running.
3783

3784
    """
3785
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3786
    assert instance is not None, \
3787
      "Cannot retrieve locked instance %s" % self.op.instance_name
3788
    _CheckNodeOnline(self, instance.primary_node)
3789

    
3790
    if instance.disk_template == constants.DT_DISKLESS:
3791
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3792
                                 self.op.instance_name)
3793
    if instance.admin_up:
3794
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3795
                                 self.op.instance_name)
3796
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3797
                                              instance.name,
3798
                                              instance.hypervisor)
3799
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3800
                      prereq=True)
3801
    if remote_info.payload:
3802
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3803
                                 (self.op.instance_name,
3804
                                  instance.primary_node))
3805

    
3806
    self.op.os_type = getattr(self.op, "os_type", None)
3807
    self.op.force_variant = getattr(self.op, "force_variant", False)
3808
    if self.op.os_type is not None:
3809
      # OS verification
3810
      pnode = self.cfg.GetNodeInfo(
3811
        self.cfg.ExpandNodeName(instance.primary_node))
3812
      if pnode is None:
3813
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
3814
                                   self.op.pnode)
3815
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3816
      result.Raise("OS '%s' not in supported OS list for primary node %s" %
3817
                   (self.op.os_type, pnode.name), prereq=True)
3818
      if not self.op.force_variant:
3819
        _CheckOSVariant(result.payload, self.op.os_type)
3820

    
3821
    self.instance = instance
3822

    
3823
  def Exec(self, feedback_fn):
3824
    """Reinstall the instance.
3825

3826
    """
3827
    inst = self.instance
3828

    
3829
    if self.op.os_type is not None:
3830
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3831
      inst.os = self.op.os_type
3832
      self.cfg.Update(inst, feedback_fn)
3833

    
3834
    _StartInstanceDisks(self, inst, None)
3835
    try:
3836
      feedback_fn("Running the instance OS create scripts...")
3837
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
3838
      result.Raise("Could not install OS for instance %s on node %s" %
3839
                   (inst.name, inst.primary_node))
3840
    finally:
3841
      _ShutdownInstanceDisks(self, inst)
3842

    
3843

    
3844
class LURecreateInstanceDisks(LogicalUnit):
3845
  """Recreate an instance's missing disks.
3846

3847
  """
3848
  HPATH = "instance-recreate-disks"
3849
  HTYPE = constants.HTYPE_INSTANCE
3850
  _OP_REQP = ["instance_name", "disks"]
3851
  REQ_BGL = False
3852

    
3853
  def CheckArguments(self):
3854
    """Check the arguments.
3855

3856
    """
3857
    if not isinstance(self.op.disks, list):
3858
      raise errors.OpPrereqError("Invalid disks parameter")
3859
    for item in self.op.disks:
3860
      if (not isinstance(item, int) or
3861
          item < 0):
3862
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
3863
                                   str(item))
3864

    
3865
  def ExpandNames(self):
3866
    self._ExpandAndLockInstance()
3867

    
3868
  def BuildHooksEnv(self):
3869
    """Build hooks env.
3870

3871
    This runs on master, primary and secondary nodes of the instance.
3872

3873
    """
3874
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3875
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3876
    return env, nl, nl
3877

    
3878
  def CheckPrereq(self):
3879
    """Check prerequisites.
3880

3881
    This checks that the instance is in the cluster and is not running.
3882

3883
    """
3884
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3885
    assert instance is not None, \
3886
      "Cannot retrieve locked instance %s" % self.op.instance_name
3887
    _CheckNodeOnline(self, instance.primary_node)
3888

    
3889
    if instance.disk_template == constants.DT_DISKLESS:
3890
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3891
                                 self.op.instance_name)
3892
    if instance.admin_up:
3893
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3894
                                 self.op.instance_name)
3895
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3896
                                              instance.name,
3897
                                              instance.hypervisor)
3898
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3899
                      prereq=True)
3900
    if remote_info.payload:
3901
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3902
                                 (self.op.instance_name,
3903
                                  instance.primary_node))
3904

    
3905
    if not self.op.disks:
3906
      self.op.disks = range(len(instance.disks))
3907
    else:
3908
      for idx in self.op.disks:
3909
        if idx >= len(instance.disks):
3910
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx)
3911

    
3912
    self.instance = instance
3913

    
3914
  def Exec(self, feedback_fn):
3915
    """Recreate the disks.
3916

3917
    """
3918
    to_skip = []
3919
    for idx, disk in enumerate(self.instance.disks):
3920
      if idx not in self.op.disks: # disk idx has not been passed in
3921
        to_skip.append(idx)
3922
        continue
3923

    
3924
    _CreateDisks(self, self.instance, to_skip=to_skip)
3925

    
3926

    
3927
class LURenameInstance(LogicalUnit):
3928
  """Rename an instance.
3929

3930
  """
3931
  HPATH = "instance-rename"
3932
  HTYPE = constants.HTYPE_INSTANCE
3933
  _OP_REQP = ["instance_name", "new_name"]
3934

    
3935
  def BuildHooksEnv(self):
3936
    """Build hooks env.
3937

3938
    This runs on master, primary and secondary nodes of the instance.
3939

3940
    """
3941
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3942
    env["INSTANCE_NEW_NAME"] = self.op.new_name
3943
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3944
    return env, nl, nl
3945

    
3946
  def CheckPrereq(self):
3947
    """Check prerequisites.
3948

3949
    This checks that the instance is in the cluster and is not running.
3950

3951
    """
3952
    instance = self.cfg.GetInstanceInfo(
3953
      self.cfg.ExpandInstanceName(self.op.instance_name))
3954
    if instance is None:
3955
      raise errors.OpPrereqError("Instance '%s' not known" %
3956
                                 self.op.instance_name)
3957
    _CheckNodeOnline(self, instance.primary_node)
3958

    
3959
    if instance.admin_up:
3960
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3961
                                 self.op.instance_name)
3962
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3963
                                              instance.name,
3964
                                              instance.hypervisor)
3965
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3966
                      prereq=True)
3967
    if remote_info.payload:
3968
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3969
                                 (self.op.instance_name,
3970
                                  instance.primary_node))
3971
    self.instance = instance
3972

    
3973
    # new name verification
3974
    name_info = utils.HostInfo(self.op.new_name)
3975

    
3976
    self.op.new_name = new_name = name_info.name
3977
    instance_list = self.cfg.GetInstanceList()
3978
    if new_name in instance_list:
3979
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3980
                                 new_name)
3981

    
3982
    if not getattr(self.op, "ignore_ip", False):
3983
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
3984
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
3985
                                   (name_info.ip, new_name))
3986

    
3987

    
3988
  def Exec(self, feedback_fn):
3989
    """Reinstall the instance.
3990

3991
    """
3992
    inst = self.instance
3993
    old_name = inst.name
3994

    
3995
    if inst.disk_template == constants.DT_FILE:
3996
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3997

    
3998
    self.cfg.RenameInstance(inst.name, self.op.new_name)
3999
    # Change the instance lock. This is definitely safe while we hold the BGL
4000
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4001
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4002

    
4003
    # re-read the instance from the configuration after rename
4004
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4005

    
4006
    if inst.disk_template == constants.DT_FILE:
4007
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4008
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4009
                                                     old_file_storage_dir,
4010
                                                     new_file_storage_dir)
4011
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4012
                   " (but the instance has been renamed in Ganeti)" %
4013
                   (inst.primary_node, old_file_storage_dir,
4014
                    new_file_storage_dir))
4015

    
4016
    _StartInstanceDisks(self, inst, None)
4017
    try:
4018
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4019
                                                 old_name)
4020
      msg = result.fail_msg
4021
      if msg:
4022
        msg = ("Could not run OS rename script for instance %s on node %s"
4023
               " (but the instance has been renamed in Ganeti): %s" %
4024
               (inst.name, inst.primary_node, msg))
4025
        self.proc.LogWarning(msg)
4026
    finally:
4027
      _ShutdownInstanceDisks(self, inst)
4028

    
4029

    
4030
class LURemoveInstance(LogicalUnit):
4031
  """Remove an instance.
4032

4033
  """
4034
  HPATH = "instance-remove"
4035
  HTYPE = constants.HTYPE_INSTANCE
4036
  _OP_REQP = ["instance_name", "ignore_failures"]
4037
  REQ_BGL = False
4038

    
4039
  def CheckArguments(self):
4040
    """Check the arguments.
4041

4042
    """
4043
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4044
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4045

    
4046
  def ExpandNames(self):
4047
    self._ExpandAndLockInstance()
4048
    self.needed_locks[locking.LEVEL_NODE] = []
4049
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4050

    
4051
  def DeclareLocks(self, level):
4052
    if level == locking.LEVEL_NODE:
4053
      self._LockInstancesNodes()
4054

    
4055
  def BuildHooksEnv(self):
4056
    """Build hooks env.
4057

4058
    This runs on master, primary and secondary nodes of the instance.
4059

4060
    """
4061
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4062
    env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4063
    nl = [self.cfg.GetMasterNode()]
4064
    return env, nl, nl
4065

    
4066
  def CheckPrereq(self):
4067
    """Check prerequisites.
4068

4069
    This checks that the instance is in the cluster.
4070

4071
    """
4072
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4073
    assert self.instance is not None, \
4074
      "Cannot retrieve locked instance %s" % self.op.instance_name
4075

    
4076
  def Exec(self, feedback_fn):
4077
    """Remove the instance.
4078

4079
    """
4080
    instance = self.instance
4081
    logging.info("Shutting down instance %s on node %s",
4082
                 instance.name, instance.primary_node)
4083

    
4084
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4085
                                             self.shutdown_timeout)
4086
    msg = result.fail_msg
4087
    if msg:
4088
      if self.op.ignore_failures:
4089
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
4090
      else:
4091
        raise errors.OpExecError("Could not shutdown instance %s on"
4092
                                 " node %s: %s" %
4093
                                 (instance.name, instance.primary_node, msg))
4094

    
4095
    logging.info("Removing block devices for instance %s", instance.name)
4096

    
4097
    if not _RemoveDisks(self, instance):
4098
      if self.op.ignore_failures:
4099
        feedback_fn("Warning: can't remove instance's disks")
4100
      else:
4101
        raise errors.OpExecError("Can't remove instance's disks")
4102

    
4103
    logging.info("Removing instance %s out of cluster config", instance.name)
4104

    
4105
    self.cfg.RemoveInstance(instance.name)
4106
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4107

    
4108

    
4109
class LUQueryInstances(NoHooksLU):
4110
  """Logical unit for querying instances.
4111

4112
  """
4113
  _OP_REQP = ["output_fields", "names", "use_locking"]
4114
  REQ_BGL = False
4115
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4116
                    "serial_no", "ctime", "mtime", "uuid"]
4117
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4118
                                    "admin_state",
4119
                                    "disk_template", "ip", "mac", "bridge",
4120
                                    "nic_mode", "nic_link",
4121
                                    "sda_size", "sdb_size", "vcpus", "tags",
4122
                                    "network_port", "beparams",
4123
                                    r"(disk)\.(size)/([0-9]+)",
4124
                                    r"(disk)\.(sizes)", "disk_usage",
4125
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4126
                                    r"(nic)\.(bridge)/([0-9]+)",
4127
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
4128
                                    r"(disk|nic)\.(count)",
4129
                                    "hvparams",
4130
                                    ] + _SIMPLE_FIELDS +
4131
                                  ["hv/%s" % name
4132
                                   for name in constants.HVS_PARAMETERS] +
4133
                                  ["be/%s" % name
4134
                                   for name in constants.BES_PARAMETERS])
4135
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4136

    
4137

    
4138
  def ExpandNames(self):
4139
    _CheckOutputFields(static=self._FIELDS_STATIC,
4140
                       dynamic=self._FIELDS_DYNAMIC,
4141
                       selected=self.op.output_fields)
4142

    
4143
    self.needed_locks = {}
4144
    self.share_locks[locking.LEVEL_INSTANCE] = 1
4145
    self.share_locks[locking.LEVEL_NODE] = 1
4146

    
4147
    if self.op.names:
4148
      self.wanted = _GetWantedInstances(self, self.op.names)
4149
    else:
4150
      self.wanted = locking.ALL_SET
4151

    
4152
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4153
    self.do_locking = self.do_node_query and self.op.use_locking
4154
    if self.do_locking:
4155
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4156
      self.needed_locks[locking.LEVEL_NODE] = []
4157
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4158

    
4159
  def DeclareLocks(self, level):
4160
    if level == locking.LEVEL_NODE and self.do_locking:
4161
      self._LockInstancesNodes()
4162

    
4163
  def CheckPrereq(self):
4164
    """Check prerequisites.
4165

4166
    """
4167
    pass
4168

    
4169
  def Exec(self, feedback_fn):
4170
    """Computes the list of nodes and their attributes.
4171

4172
    """
4173
    all_info = self.cfg.GetAllInstancesInfo()
4174
    if self.wanted == locking.ALL_SET:
4175
      # caller didn't specify instance names, so ordering is not important
4176
      if self.do_locking:
4177
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4178
      else:
4179
        instance_names = all_info.keys()
4180
      instance_names = utils.NiceSort(instance_names)
4181
    else:
4182
      # caller did specify names, so we must keep the ordering
4183
      if self.do_locking:
4184
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4185
      else:
4186
        tgt_set = all_info.keys()
4187
      missing = set(self.wanted).difference(tgt_set)
4188
      if missing:
4189
        raise errors.OpExecError("Some instances were removed before"
4190
                                 " retrieving their data: %s" % missing)
4191
      instance_names = self.wanted
4192

    
4193
    instance_list = [all_info[iname] for iname in instance_names]
4194

    
4195
    # begin data gathering
4196

    
4197
    nodes = frozenset([inst.primary_node for inst in instance_list])
4198
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4199

    
4200
    bad_nodes = []
4201
    off_nodes = []
4202
    if self.do_node_query:
4203
      live_data = {}
4204
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4205
      for name in nodes:
4206
        result = node_data[name]
4207
        if result.offline:
4208
          # offline nodes will be in both lists
4209
          off_nodes.append(name)
4210
        if result.fail_msg:
4211
          bad_nodes.append(name)
4212
        else:
4213
          if result.payload:
4214
            live_data.update(result.payload)
4215
          # else no instance is alive
4216
    else:
4217
      live_data = dict([(name, {}) for name in instance_names])
4218

    
4219
    # end data gathering
4220

    
4221
    HVPREFIX = "hv/"
4222
    BEPREFIX = "be/"
4223
    output = []
4224
    cluster = self.cfg.GetClusterInfo()
4225
    for instance in instance_list:
4226
      iout = []
4227
      i_hv = cluster.FillHV(instance)
4228
      i_be = cluster.FillBE(instance)
4229
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4230
                                 nic.nicparams) for nic in instance.nics]
4231
      for field in self.op.output_fields:
4232
        st_match = self._FIELDS_STATIC.Matches(field)
4233
        if field in self._SIMPLE_FIELDS:
4234
          val = getattr(instance, field)
4235
        elif field == "pnode":
4236
          val = instance.primary_node
4237
        elif field == "snodes":
4238
          val = list(instance.secondary_nodes)
4239
        elif field == "admin_state":
4240
          val = instance.admin_up
4241
        elif field == "oper_state":
4242
          if instance.primary_node in bad_nodes:
4243
            val = None
4244
          else:
4245
            val = bool(live_data.get(instance.name))
4246
        elif field == "status":
4247
          if instance.primary_node in off_nodes:
4248
            val = "ERROR_nodeoffline"
4249
          elif instance.primary_node in bad_nodes:
4250
            val = "ERROR_nodedown"
4251
          else:
4252
            running = bool(live_data.get(instance.name))
4253
            if running:
4254
              if instance.admin_up:
4255
                val = "running"
4256
              else:
4257
                val = "ERROR_up"
4258
            else:
4259
              if instance.admin_up:
4260
                val = "ERROR_down"
4261
              else:
4262
                val = "ADMIN_down"
4263
        elif field == "oper_ram":
4264
          if instance.primary_node in bad_nodes:
4265
            val = None
4266
          elif instance.name in live_data:
4267
            val = live_data[instance.name].get("memory", "?")
4268
          else:
4269
            val = "-"
4270
        elif field == "vcpus":
4271
          val = i_be[constants.BE_VCPUS]
4272
        elif field == "disk_template":
4273
          val = instance.disk_template
4274
        elif field == "ip":
4275
          if instance.nics:
4276
            val = instance.nics[0].ip
4277
          else:
4278
            val = None
4279
        elif field == "nic_mode":
4280
          if instance.nics:
4281
            val = i_nicp[0][constants.NIC_MODE]
4282
          else:
4283
            val = None
4284
        elif field == "nic_link":
4285
          if instance.nics:
4286
            val = i_nicp[0][constants.NIC_LINK]
4287
          else:
4288
            val = None
4289
        elif field == "bridge":
4290
          if (instance.nics and
4291
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4292
            val = i_nicp[0][constants.NIC_LINK]
4293
          else:
4294
            val = None
4295
        elif field == "mac":
4296
          if instance.nics:
4297
            val = instance.nics[0].mac
4298
          else:
4299
            val = None
4300
        elif field == "sda_size" or field == "sdb_size":
4301
          idx = ord(field[2]) - ord('a')
4302
          try:
4303
            val = instance.FindDisk(idx).size
4304
          except errors.OpPrereqError:
4305
            val = None
4306
        elif field == "disk_usage": # total disk usage per node
4307
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
4308
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4309
        elif field == "tags":
4310
          val = list(instance.GetTags())
4311
        elif field == "hvparams":
4312
          val = i_hv
4313
        elif (field.startswith(HVPREFIX) and
4314
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
4315
          val = i_hv.get(field[len(HVPREFIX):], None)
4316
        elif field == "beparams":
4317
          val = i_be
4318
        elif (field.startswith(BEPREFIX) and
4319
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4320
          val = i_be.get(field[len(BEPREFIX):], None)
4321
        elif st_match and st_match.groups():
4322
          # matches a variable list
4323
          st_groups = st_match.groups()
4324
          if st_groups and st_groups[0] == "disk":
4325
            if st_groups[1] == "count":
4326
              val = len(instance.disks)
4327
            elif st_groups[1] == "sizes":
4328
              val = [disk.size for disk in instance.disks]
4329
            elif st_groups[1] == "size":
4330
              try:
4331
                val = instance.FindDisk(st_groups[2]).size
4332
              except errors.OpPrereqError:
4333
                val = None
4334
            else:
4335
              assert False, "Unhandled disk parameter"
4336
          elif st_groups[0] == "nic":
4337
            if st_groups[1] == "count":
4338
              val = len(instance.nics)
4339
            elif st_groups[1] == "macs":
4340
              val = [nic.mac for nic in instance.nics]
4341
            elif st_groups[1] == "ips":
4342
              val = [nic.ip for nic in instance.nics]
4343
            elif st_groups[1] == "modes":
4344
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4345
            elif st_groups[1] == "links":
4346
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4347
            elif st_groups[1] == "bridges":
4348
              val = []
4349
              for nicp in i_nicp:
4350
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4351
                  val.append(nicp[constants.NIC_LINK])
4352
                else:
4353
                  val.append(None)
4354
            else:
4355
              # index-based item
4356
              nic_idx = int(st_groups[2])
4357
              if nic_idx >= len(instance.nics):
4358
                val = None
4359
              else:
4360
                if st_groups[1] == "mac":
4361
                  val = instance.nics[nic_idx].mac
4362
                elif st_groups[1] == "ip":
4363
                  val = instance.nics[nic_idx].ip
4364
                elif st_groups[1] == "mode":
4365
                  val = i_nicp[nic_idx][constants.NIC_MODE]
4366
                elif st_groups[1] == "link":
4367
                  val = i_nicp[nic_idx][constants.NIC_LINK]
4368
                elif st_groups[1] == "bridge":
4369
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4370
                  if nic_mode == constants.NIC_MODE_BRIDGED:
4371
                    val = i_nicp[nic_idx][constants.NIC_LINK]
4372
                  else:
4373
                    val = None
4374
                else:
4375
                  assert False, "Unhandled NIC parameter"
4376
          else:
4377
            assert False, ("Declared but unhandled variable parameter '%s'" %
4378
                           field)
4379
        else:
4380
          assert False, "Declared but unhandled parameter '%s'" % field
4381
        iout.append(val)
4382
      output.append(iout)
4383

    
4384
    return output
4385

    
4386

    
4387
class LUFailoverInstance(LogicalUnit):
4388
  """Failover an instance.
4389

4390
  """
4391
  HPATH = "instance-failover"
4392
  HTYPE = constants.HTYPE_INSTANCE
4393
  _OP_REQP = ["instance_name", "ignore_consistency"]
4394
  REQ_BGL = False
4395

    
4396
  def CheckArguments(self):
4397
    """Check the arguments.
4398

4399
    """
4400
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4401
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4402

    
4403
  def ExpandNames(self):
4404
    self._ExpandAndLockInstance()
4405
    self.needed_locks[locking.LEVEL_NODE] = []
4406
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4407

    
4408
  def DeclareLocks(self, level):
4409
    if level == locking.LEVEL_NODE:
4410
      self._LockInstancesNodes()
4411

    
4412
  def BuildHooksEnv(self):
4413
    """Build hooks env.
4414

4415
    This runs on master, primary and secondary nodes of the instance.
4416

4417
    """
4418
    env = {
4419
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4420
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4421
      }
4422
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4423
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
4424
    return env, nl, nl
4425

    
4426
  def CheckPrereq(self):
4427
    """Check prerequisites.
4428

4429
    This checks that the instance is in the cluster.
4430

4431
    """
4432
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4433
    assert self.instance is not None, \
4434
      "Cannot retrieve locked instance %s" % self.op.instance_name
4435

    
4436
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4437
    if instance.disk_template not in constants.DTS_NET_MIRROR:
4438
      raise errors.OpPrereqError("Instance's disk layout is not"
4439
                                 " network mirrored, cannot failover.")
4440

    
4441
    secondary_nodes = instance.secondary_nodes
4442
    if not secondary_nodes:
4443
      raise errors.ProgrammerError("no secondary node but using "
4444
                                   "a mirrored disk template")
4445

    
4446
    target_node = secondary_nodes[0]
4447
    _CheckNodeOnline(self, target_node)
4448
    _CheckNodeNotDrained(self, target_node)
4449
    if instance.admin_up:
4450
      # check memory requirements on the secondary node
4451
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4452
                           instance.name, bep[constants.BE_MEMORY],
4453
                           instance.hypervisor)
4454
    else:
4455
      self.LogInfo("Not checking memory on the secondary node as"
4456
                   " instance will not be started")
4457

    
4458
    # check bridge existance
4459
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4460

    
4461
  def Exec(self, feedback_fn):
4462
    """Failover an instance.
4463

4464
    The failover is done by shutting it down on its present node and
4465
    starting it on the secondary.
4466

4467
    """
4468
    instance = self.instance
4469

    
4470
    source_node = instance.primary_node
4471
    target_node = instance.secondary_nodes[0]
4472

    
4473
    if instance.admin_up:
4474
      feedback_fn("* checking disk consistency between source and target")
4475
      for dev in instance.disks:
4476
        # for drbd, these are drbd over lvm
4477
        if not _CheckDiskConsistency(self, dev, target_node, False):
4478
          if not self.op.ignore_consistency:
4479
            raise errors.OpExecError("Disk %s is degraded on target node,"
4480
                                     " aborting failover." % dev.iv_name)
4481
    else:
4482
      feedback_fn("* not checking disk consistency as instance is not running")
4483

    
4484
    feedback_fn("* shutting down instance on source node")
4485
    logging.info("Shutting down instance %s on node %s",
4486
                 instance.name, source_node)
4487

    
4488
    result = self.rpc.call_instance_shutdown(source_node, instance,
4489
                                             self.shutdown_timeout)
4490
    msg = result.fail_msg
4491
    if msg:
4492
      if self.op.ignore_consistency:
4493
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4494
                             " Proceeding anyway. Please make sure node"
4495
                             " %s is down. Error details: %s",
4496
                             instance.name, source_node, source_node, msg)
4497
      else:
4498
        raise errors.OpExecError("Could not shutdown instance %s on"
4499
                                 " node %s: %s" %
4500
                                 (instance.name, source_node, msg))
4501

    
4502
    feedback_fn("* deactivating the instance's disks on source node")
4503
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
4504
      raise errors.OpExecError("Can't shut down the instance's disks.")
4505

    
4506
    instance.primary_node = target_node
4507
    # distribute new instance config to the other nodes
4508
    self.cfg.Update(instance, feedback_fn)
4509

    
4510
    # Only start the instance if it's marked as up
4511
    if instance.admin_up:
4512
      feedback_fn("* activating the instance's disks on target node")
4513
      logging.info("Starting instance %s on node %s",
4514
                   instance.name, target_node)
4515

    
4516
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4517
                                               ignore_secondaries=True)
4518
      if not disks_ok:
4519
        _ShutdownInstanceDisks(self, instance)
4520
        raise errors.OpExecError("Can't activate the instance's disks")
4521

    
4522
      feedback_fn("* starting the instance on the target node")
4523
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4524
      msg = result.fail_msg
4525
      if msg:
4526
        _ShutdownInstanceDisks(self, instance)
4527
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4528
                                 (instance.name, target_node, msg))
4529

    
4530

    
4531
class LUMigrateInstance(LogicalUnit):
4532
  """Migrate an instance.
4533

4534
  This is migration without shutting down, compared to the failover,
4535
  which is done with shutdown.
4536

4537
  """
4538
  HPATH = "instance-migrate"
4539
  HTYPE = constants.HTYPE_INSTANCE
4540
  _OP_REQP = ["instance_name", "live", "cleanup"]
4541

    
4542
  REQ_BGL = False
4543

    
4544
  def ExpandNames(self):
4545
    self._ExpandAndLockInstance()
4546

    
4547
    self.needed_locks[locking.LEVEL_NODE] = []
4548
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4549

    
4550
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
4551
                                       self.op.live, self.op.cleanup)
4552
    self.tasklets = [self._migrater]
4553

    
4554
  def DeclareLocks(self, level):
4555
    if level == locking.LEVEL_NODE:
4556
      self._LockInstancesNodes()
4557

    
4558
  def BuildHooksEnv(self):
4559
    """Build hooks env.
4560

4561
    This runs on master, primary and secondary nodes of the instance.
4562

4563
    """
4564
    instance = self._migrater.instance
4565
    env = _BuildInstanceHookEnvByObject(self, instance)
4566
    env["MIGRATE_LIVE"] = self.op.live
4567
    env["MIGRATE_CLEANUP"] = self.op.cleanup
4568
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4569
    return env, nl, nl
4570

    
4571

    
4572
class LUMoveInstance(LogicalUnit):
4573
  """Move an instance by data-copying.
4574

4575
  """
4576
  HPATH = "instance-move"
4577
  HTYPE = constants.HTYPE_INSTANCE
4578
  _OP_REQP = ["instance_name", "target_node"]
4579
  REQ_BGL = False
4580

    
4581
  def CheckArguments(self):
4582
    """Check the arguments.
4583

4584
    """
4585
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4586
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4587

    
4588
  def ExpandNames(self):
4589
    self._ExpandAndLockInstance()
4590
    target_node = self.cfg.ExpandNodeName(self.op.target_node)
4591
    if target_node is None:
4592
      raise errors.OpPrereqError("Node '%s' not known" %
4593
                                  self.op.target_node)
4594
    self.op.target_node = target_node
4595
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
4596
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4597

    
4598
  def DeclareLocks(self, level):
4599
    if level == locking.LEVEL_NODE:
4600
      self._LockInstancesNodes(primary_only=True)
4601

    
4602
  def BuildHooksEnv(self):
4603
    """Build hooks env.
4604

4605
    This runs on master, primary and secondary nodes of the instance.
4606

4607
    """
4608
    env = {
4609
      "TARGET_NODE": self.op.target_node,
4610
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4611
      }
4612
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4613
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
4614
                                       self.op.target_node]
4615
    return env, nl, nl
4616

    
4617
  def CheckPrereq(self):
4618
    """Check prerequisites.
4619

4620
    This checks that the instance is in the cluster.
4621

4622
    """
4623
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4624
    assert self.instance is not None, \
4625
      "Cannot retrieve locked instance %s" % self.op.instance_name
4626

    
4627
    node = self.cfg.GetNodeInfo(self.op.target_node)
4628
    assert node is not None, \
4629
      "Cannot retrieve locked node %s" % self.op.target_node
4630

    
4631
    self.target_node = target_node = node.name
4632

    
4633
    if target_node == instance.primary_node:
4634
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
4635
                                 (instance.name, target_node))
4636

    
4637
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4638

    
4639
    for idx, dsk in enumerate(instance.disks):
4640
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
4641
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
4642
                                   " cannot copy")
4643

    
4644
    _CheckNodeOnline(self, target_node)
4645
    _CheckNodeNotDrained(self, target_node)
4646

    
4647
    if instance.admin_up:
4648
      # check memory requirements on the secondary node
4649
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4650
                           instance.name, bep[constants.BE_MEMORY],
4651
                           instance.hypervisor)
4652
    else:
4653
      self.LogInfo("Not checking memory on the secondary node as"
4654
                   " instance will not be started")
4655

    
4656
    # check bridge existance
4657
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4658

    
4659
  def Exec(self, feedback_fn):
4660
    """Move an instance.
4661

4662
    The move is done by shutting it down on its present node, copying
4663
    the data over (slow) and starting it on the new node.
4664

4665
    """
4666
    instance = self.instance
4667

    
4668
    source_node = instance.primary_node
4669
    target_node = self.target_node
4670

    
4671
    self.LogInfo("Shutting down instance %s on source node %s",
4672
                 instance.name, source_node)
4673

    
4674
    result = self.rpc.call_instance_shutdown(source_node, instance,
4675
                                             self.shutdown_timeout)
4676
    msg = result.fail_msg
4677
    if msg:
4678
      if self.op.ignore_consistency:
4679
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4680
                             " Proceeding anyway. Please make sure node"
4681
                             " %s is down. Error details: %s",
4682
                             instance.name, source_node, source_node, msg)
4683
      else:
4684
        raise errors.OpExecError("Could not shutdown instance %s on"
4685
                                 " node %s: %s" %
4686
                                 (instance.name, source_node, msg))
4687

    
4688
    # create the target disks
4689
    try:
4690
      _CreateDisks(self, instance, target_node=target_node)
4691
    except errors.OpExecError:
4692
      self.LogWarning("Device creation failed, reverting...")
4693
      try:
4694
        _RemoveDisks(self, instance, target_node=target_node)
4695
      finally:
4696
        self.cfg.ReleaseDRBDMinors(instance.name)
4697
        raise
4698

    
4699
    cluster_name = self.cfg.GetClusterInfo().cluster_name
4700

    
4701
    errs = []
4702
    # activate, get path, copy the data over
4703
    for idx, disk in enumerate(instance.disks):
4704
      self.LogInfo("Copying data for disk %d", idx)
4705
      result = self.rpc.call_blockdev_assemble(target_node, disk,
4706
                                               instance.name, True)
4707
      if result.fail_msg:
4708
        self.LogWarning("Can't assemble newly created disk %d: %s",
4709
                        idx, result.fail_msg)
4710
        errs.append(result.fail_msg)
4711
        break
4712
      dev_path = result.payload
4713
      result = self.rpc.call_blockdev_export(source_node, disk,
4714
                                             target_node, dev_path,
4715
                                             cluster_name)
4716
      if result.fail_msg:
4717
        self.LogWarning("Can't copy data over for disk %d: %s",
4718
                        idx, result.fail_msg)
4719
        errs.append(result.fail_msg)
4720
        break
4721

    
4722
    if errs:
4723
      self.LogWarning("Some disks failed to copy, aborting")
4724
      try:
4725
        _RemoveDisks(self, instance, target_node=target_node)
4726
      finally:
4727
        self.cfg.ReleaseDRBDMinors(instance.name)
4728
        raise errors.OpExecError("Errors during disk copy: %s" %
4729
                                 (",".join(errs),))
4730

    
4731
    instance.primary_node = target_node
4732
    self.cfg.Update(instance, feedback_fn)
4733

    
4734
    self.LogInfo("Removing the disks on the original node")
4735
    _RemoveDisks(self, instance, target_node=source_node)
4736

    
4737
    # Only start the instance if it's marked as up
4738
    if instance.admin_up:
4739
      self.LogInfo("Starting instance %s on node %s",
4740
                   instance.name, target_node)
4741

    
4742
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4743
                                           ignore_secondaries=True)
4744
      if not disks_ok:
4745
        _ShutdownInstanceDisks(self, instance)
4746
        raise errors.OpExecError("Can't activate the instance's disks")
4747

    
4748
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4749
      msg = result.fail_msg
4750
      if msg:
4751
        _ShutdownInstanceDisks(self, instance)
4752
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4753
                                 (instance.name, target_node, msg))
4754

    
4755

    
4756
class LUMigrateNode(LogicalUnit):
4757
  """Migrate all instances from a node.
4758

4759
  """
4760
  HPATH = "node-migrate"
4761
  HTYPE = constants.HTYPE_NODE
4762
  _OP_REQP = ["node_name", "live"]
4763
  REQ_BGL = False
4764

    
4765
  def ExpandNames(self):
4766
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
4767
    if self.op.node_name is None:
4768
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
4769

    
4770
    self.needed_locks = {
4771
      locking.LEVEL_NODE: [self.op.node_name],
4772
      }
4773

    
4774
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4775

    
4776
    # Create tasklets for migrating instances for all instances on this node
4777
    names = []
4778
    tasklets = []
4779

    
4780
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
4781
      logging.debug("Migrating instance %s", inst.name)
4782
      names.append(inst.name)
4783

    
4784
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
4785

    
4786
    self.tasklets = tasklets
4787

    
4788
    # Declare instance locks
4789
    self.needed_locks[locking.LEVEL_INSTANCE] = names
4790

    
4791
  def DeclareLocks(self, level):
4792
    if level == locking.LEVEL_NODE:
4793
      self._LockInstancesNodes()
4794

    
4795
  def BuildHooksEnv(self):
4796
    """Build hooks env.
4797

4798
    This runs on the master, the primary and all the secondaries.
4799

4800
    """
4801
    env = {
4802
      "NODE_NAME": self.op.node_name,
4803
      }
4804

    
4805
    nl = [self.cfg.GetMasterNode()]
4806

    
4807
    return (env, nl, nl)
4808

    
4809

    
4810
class TLMigrateInstance(Tasklet):
4811
  def __init__(self, lu, instance_name, live, cleanup):
4812
    """Initializes this class.
4813

4814
    """
4815
    Tasklet.__init__(self, lu)
4816

    
4817
    # Parameters
4818
    self.instance_name = instance_name
4819
    self.live = live
4820
    self.cleanup = cleanup
4821

    
4822
  def CheckPrereq(self):
4823
    """Check prerequisites.
4824

4825
    This checks that the instance is in the cluster.
4826

4827
    """
4828
    instance = self.cfg.GetInstanceInfo(
4829
      self.cfg.ExpandInstanceName(self.instance_name))
4830
    if instance is None:
4831
      raise errors.OpPrereqError("Instance '%s' not known" %
4832
                                 self.instance_name)
4833

    
4834
    if instance.disk_template != constants.DT_DRBD8:
4835
      raise errors.OpPrereqError("Instance's disk layout is not"
4836
                                 " drbd8, cannot migrate.")
4837

    
4838
    secondary_nodes = instance.secondary_nodes
4839
    if not secondary_nodes:
4840
      raise errors.ConfigurationError("No secondary node but using"
4841
                                      " drbd8 disk template")
4842

    
4843
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
4844

    
4845
    target_node = secondary_nodes[0]
4846
    # check memory requirements on the secondary node
4847
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
4848
                         instance.name, i_be[constants.BE_MEMORY],
4849
                         instance.hypervisor)
4850

    
4851
    # check bridge existance
4852
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4853

    
4854
    if not self.cleanup:
4855
      _CheckNodeNotDrained(self, target_node)
4856
      result = self.rpc.call_instance_migratable(instance.primary_node,
4857
                                                 instance)
4858
      result.Raise("Can't migrate, please use failover", prereq=True)
4859

    
4860
    self.instance = instance
4861

    
4862
  def _WaitUntilSync(self):
4863
    """Poll with custom rpc for disk sync.
4864

4865
    This uses our own step-based rpc call.
4866

4867
    """
4868
    self.feedback_fn("* wait until resync is done")
4869
    all_done = False
4870
    while not all_done:
4871
      all_done = True
4872
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
4873
                                            self.nodes_ip,
4874
                                            self.instance.disks)
4875
      min_percent = 100
4876
      for node, nres in result.items():
4877
        nres.Raise("Cannot resync disks on node %s" % node)
4878
        node_done, node_percent = nres.payload
4879
        all_done = all_done and node_done
4880
        if node_percent is not None:
4881
          min_percent = min(min_percent, node_percent)
4882
      if not all_done:
4883
        if min_percent < 100:
4884
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
4885
        time.sleep(2)
4886

    
4887
  def _EnsureSecondary(self, node):
4888
    """Demote a node to secondary.
4889

4890
    """
4891
    self.feedback_fn("* switching node %s to secondary mode" % node)
4892

    
4893
    for dev in self.instance.disks:
4894
      self.cfg.SetDiskID(dev, node)
4895

    
4896
    result = self.rpc.call_blockdev_close(node, self.instance.name,
4897
                                          self.instance.disks)
4898
    result.Raise("Cannot change disk to secondary on node %s" % node)
4899

    
4900
  def _GoStandalone(self):
4901
    """Disconnect from the network.
4902

4903
    """
4904
    self.feedback_fn("* changing into standalone mode")
4905
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
4906
                                               self.instance.disks)
4907
    for node, nres in result.items():
4908
      nres.Raise("Cannot disconnect disks node %s" % node)
4909

    
4910
  def _GoReconnect(self, multimaster):
4911
    """Reconnect to the network.
4912

4913
    """
4914
    if multimaster:
4915
      msg = "dual-master"
4916
    else:
4917
      msg = "single-master"
4918
    self.feedback_fn("* changing disks into %s mode" % msg)
4919
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
4920
                                           self.instance.disks,
4921
                                           self.instance.name, multimaster)
4922
    for node, nres in result.items():
4923
      nres.Raise("Cannot change disks config on node %s" % node)
4924

    
4925
  def _ExecCleanup(self):
4926
    """Try to cleanup after a failed migration.
4927

4928
    The cleanup is done by:
4929
      - check that the instance is running only on one node
4930
        (and update the config if needed)
4931
      - change disks on its secondary node to secondary
4932
      - wait until disks are fully synchronized
4933
      - disconnect from the network
4934
      - change disks into single-master mode
4935
      - wait again until disks are fully synchronized
4936

4937
    """
4938
    instance = self.instance
4939
    target_node = self.target_node
4940
    source_node = self.source_node
4941

    
4942
    # check running on only one node
4943
    self.feedback_fn("* checking where the instance actually runs"
4944
                     " (if this hangs, the hypervisor might be in"
4945
                     " a bad state)")
4946
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
4947
    for node, result in ins_l.items():
4948
      result.Raise("Can't contact node %s" % node)
4949

    
4950
    runningon_source = instance.name in ins_l[source_node].payload
4951
    runningon_target = instance.name in ins_l[target_node].payload
4952

    
4953
    if runningon_source and runningon_target:
4954
      raise errors.OpExecError("Instance seems to be running on two nodes,"
4955
                               " or the hypervisor is confused. You will have"
4956
                               " to ensure manually that it runs only on one"
4957
                               " and restart this operation.")
4958

    
4959
    if not (runningon_source or runningon_target):
4960
      raise errors.OpExecError("Instance does not seem to be running at all."
4961
                               " In this case, it's safer to repair by"
4962
                               " running 'gnt-instance stop' to ensure disk"
4963
                               " shutdown, and then restarting it.")
4964

    
4965
    if runningon_target:
4966
      # the migration has actually succeeded, we need to update the config
4967
      self.feedback_fn("* instance running on secondary node (%s),"
4968
                       " updating config" % target_node)
4969
      instance.primary_node = target_node
4970
      self.cfg.Update(instance, self.feedback_fn)
4971
      demoted_node = source_node
4972
    else:
4973
      self.feedback_fn("* instance confirmed to be running on its"
4974
                       " primary node (%s)" % source_node)
4975
      demoted_node = target_node
4976

    
4977
    self._EnsureSecondary(demoted_node)
4978
    try:
4979
      self._WaitUntilSync()
4980
    except errors.OpExecError:
4981
      # we ignore here errors, since if the device is standalone, it
4982
      # won't be able to sync
4983
      pass
4984
    self._GoStandalone()
4985
    self._GoReconnect(False)
4986
    self._WaitUntilSync()
4987

    
4988
    self.feedback_fn("* done")
4989

    
4990
  def _RevertDiskStatus(self):
4991
    """Try to revert the disk status after a failed migration.
4992

4993
    """
4994
    target_node = self.target_node
4995
    try:
4996
      self._EnsureSecondary(target_node)
4997
      self._GoStandalone()
4998
      self._GoReconnect(False)
4999
      self._WaitUntilSync()
5000
    except errors.OpExecError, err:
5001
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5002
                         " drives: error '%s'\n"
5003
                         "Please look and recover the instance status" %
5004
                         str(err))
5005

    
5006
  def _AbortMigration(self):
5007
    """Call the hypervisor code to abort a started migration.
5008

5009
    """
5010
    instance = self.instance
5011
    target_node = self.target_node
5012
    migration_info = self.migration_info
5013

    
5014
    abort_result = self.rpc.call_finalize_migration(target_node,
5015
                                                    instance,
5016
                                                    migration_info,
5017
                                                    False)
5018
    abort_msg = abort_result.fail_msg
5019
    if abort_msg:
5020
      logging.error("Aborting migration failed on target node %s: %s" %
5021
                    (target_node, abort_msg))
5022
      # Don't raise an exception here, as we stil have to try to revert the
5023
      # disk status, even if this step failed.
5024

    
5025
  def _ExecMigration(self):
5026
    """Migrate an instance.
5027

5028
    The migrate is done by:
5029
      - change the disks into dual-master mode
5030
      - wait until disks are fully synchronized again
5031
      - migrate the instance
5032
      - change disks on the new secondary node (the old primary) to secondary
5033
      - wait until disks are fully synchronized
5034
      - change disks into single-master mode
5035

5036
    """
5037
    instance = self.instance
5038
    target_node = self.target_node
5039
    source_node = self.source_node
5040

    
5041
    self.feedback_fn("* checking disk consistency between source and target")
5042
    for dev in instance.disks:
5043
      if not _CheckDiskConsistency(self, dev, target_node, False):
5044
        raise errors.OpExecError("Disk %s is degraded or not fully"
5045
                                 " synchronized on target node,"
5046
                                 " aborting migrate." % dev.iv_name)
5047

    
5048
    # First get the migration information from the remote node
5049
    result = self.rpc.call_migration_info(source_node, instance)
5050
    msg = result.fail_msg
5051
    if msg:
5052
      log_err = ("Failed fetching source migration information from %s: %s" %
5053
                 (source_node, msg))
5054
      logging.error(log_err)
5055
      raise errors.OpExecError(log_err)
5056

    
5057
    self.migration_info = migration_info = result.payload
5058

    
5059
    # Then switch the disks to master/master mode
5060
    self._EnsureSecondary(target_node)
5061
    self._GoStandalone()
5062
    self._GoReconnect(True)
5063
    self._WaitUntilSync()
5064

    
5065
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
5066
    result = self.rpc.call_accept_instance(target_node,
5067
                                           instance,
5068
                                           migration_info,
5069
                                           self.nodes_ip[target_node])
5070

    
5071
    msg = result.fail_msg
5072
    if msg:
5073
      logging.error("Instance pre-migration failed, trying to revert"
5074
                    " disk status: %s", msg)
5075
      self._AbortMigration()
5076
      self._RevertDiskStatus()
5077
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5078
                               (instance.name, msg))
5079

    
5080
    self.feedback_fn("* migrating instance to %s" % target_node)
5081
    time.sleep(10)
5082
    result = self.rpc.call_instance_migrate(source_node, instance,
5083
                                            self.nodes_ip[target_node],
5084
                                            self.live)
5085
    msg = result.fail_msg
5086
    if msg:
5087
      logging.error("Instance migration failed, trying to revert"
5088
                    " disk status: %s", msg)
5089
      self._AbortMigration()
5090
      self._RevertDiskStatus()
5091
      raise errors.OpExecError("Could not migrate instance %s: %s" %
5092
                               (instance.name, msg))
5093
    time.sleep(10)
5094

    
5095
    instance.primary_node = target_node
5096
    # distribute new instance config to the other nodes
5097
    self.cfg.Update(instance, self.feedback_fn)
5098

    
5099
    result = self.rpc.call_finalize_migration(target_node,
5100
                                              instance,
5101
                                              migration_info,
5102
                                              True)
5103
    msg = result.fail_msg
5104
    if msg:
5105
      logging.error("Instance migration succeeded, but finalization failed:"
5106
                    " %s" % msg)
5107
      raise errors.OpExecError("Could not finalize instance migration: %s" %
5108
                               msg)
5109

    
5110
    self._EnsureSecondary(source_node)
5111
    self._WaitUntilSync()
5112
    self._GoStandalone()
5113
    self._GoReconnect(False)
5114
    self._WaitUntilSync()
5115

    
5116
    self.feedback_fn("* done")
5117

    
5118
  def Exec(self, feedback_fn):
5119
    """Perform the migration.
5120

5121
    """
5122
    feedback_fn("Migrating instance %s" % self.instance.name)
5123

    
5124
    self.feedback_fn = feedback_fn
5125

    
5126
    self.source_node = self.instance.primary_node
5127
    self.target_node = self.instance.secondary_nodes[0]
5128
    self.all_nodes = [self.source_node, self.target_node]
5129
    self.nodes_ip = {
5130
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5131
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5132
      }
5133

    
5134
    if self.cleanup:
5135
      return self._ExecCleanup()
5136
    else:
5137
      return self._ExecMigration()
5138

    
5139

    
5140
def _CreateBlockDev(lu, node, instance, device, force_create,
5141
                    info, force_open):
5142
  """Create a tree of block devices on a given node.
5143

5144
  If this device type has to be created on secondaries, create it and
5145
  all its children.
5146

5147
  If not, just recurse to children keeping the same 'force' value.
5148

5149
  @param lu: the lu on whose behalf we execute
5150
  @param node: the node on which to create the device
5151
  @type instance: L{objects.Instance}
5152
  @param instance: the instance which owns the device
5153
  @type device: L{objects.Disk}
5154
  @param device: the device to create
5155
  @type force_create: boolean
5156
  @param force_create: whether to force creation of this device; this
5157
      will be change to True whenever we find a device which has
5158
      CreateOnSecondary() attribute
5159
  @param info: the extra 'metadata' we should attach to the device
5160
      (this will be represented as a LVM tag)
5161
  @type force_open: boolean
5162
  @param force_open: this parameter will be passes to the
5163
      L{backend.BlockdevCreate} function where it specifies
5164
      whether we run on primary or not, and it affects both
5165
      the child assembly and the device own Open() execution
5166

5167
  """
5168
  if device.CreateOnSecondary():
5169
    force_create = True
5170

    
5171
  if device.children:
5172
    for child in device.children:
5173
      _CreateBlockDev(lu, node, instance, child, force_create,
5174
                      info, force_open)
5175

    
5176
  if not force_create:
5177
    return
5178

    
5179
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5180

    
5181

    
5182
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5183
  """Create a single block device on a given node.
5184

5185
  This will not recurse over children of the device, so they must be
5186
  created in advance.
5187

5188
  @param lu: the lu on whose behalf we execute
5189
  @param node: the node on which to create the device
5190
  @type instance: L{objects.Instance}
5191
  @param instance: the instance which owns the device
5192
  @type device: L{objects.Disk}
5193
  @param device: the device to create
5194
  @param info: the extra 'metadata' we should attach to the device
5195
      (this will be represented as a LVM tag)
5196
  @type force_open: boolean
5197
  @param force_open: this parameter will be passes to the
5198
      L{backend.BlockdevCreate} function where it specifies
5199
      whether we run on primary or not, and it affects both
5200
      the child assembly and the device own Open() execution
5201

5202
  """
5203
  lu.cfg.SetDiskID(device, node)
5204
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5205
                                       instance.name, force_open, info)
5206
  result.Raise("Can't create block device %s on"
5207
               " node %s for instance %s" % (device, node, instance.name))
5208
  if device.physical_id is None:
5209
    device.physical_id = result.payload
5210

    
5211

    
5212
def _GenerateUniqueNames(lu, exts):
5213
  """Generate a suitable LV name.
5214

5215
  This will generate a logical volume name for the given instance.
5216

5217
  """
5218
  results = []
5219
  for val in exts:
5220
    new_id = lu.cfg.GenerateUniqueID()
5221
    results.append("%s%s" % (new_id, val))
5222
  return results
5223

    
5224

    
5225
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5226
                         p_minor, s_minor):
5227
  """Generate a drbd8 device complete with its children.
5228

5229
  """
5230
  port = lu.cfg.AllocatePort()
5231
  vgname = lu.cfg.GetVGName()
5232
  shared_secret = lu.cfg.GenerateDRBDSecret()
5233
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5234
                          logical_id=(vgname, names[0]))
5235
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5236
                          logical_id=(vgname, names[1]))
5237
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5238
                          logical_id=(primary, secondary, port,
5239
                                      p_minor, s_minor,
5240
                                      shared_secret),
5241
                          children=[dev_data, dev_meta],
5242
                          iv_name=iv_name)
5243
  return drbd_dev
5244

    
5245

    
5246
def _GenerateDiskTemplate(lu, template_name,
5247
                          instance_name, primary_node,
5248
                          secondary_nodes, disk_info,
5249
                          file_storage_dir, file_driver,
5250
                          base_index):
5251
  """Generate the entire disk layout for a given template type.
5252

5253
  """
5254
  #TODO: compute space requirements
5255

    
5256
  vgname = lu.cfg.GetVGName()
5257
  disk_count = len(disk_info)
5258
  disks = []
5259
  if template_name == constants.DT_DISKLESS:
5260
    pass
5261
  elif template_name == constants.DT_PLAIN:
5262
    if len(secondary_nodes) != 0:
5263
      raise errors.ProgrammerError("Wrong template configuration")
5264

    
5265
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5266
                                      for i in range(disk_count)])
5267
    for idx, disk in enumerate(disk_info):
5268
      disk_index = idx + base_index
5269
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5270
                              logical_id=(vgname, names[idx]),
5271
                              iv_name="disk/%d" % disk_index,
5272
                              mode=disk["mode"])
5273
      disks.append(disk_dev)
5274
  elif template_name == constants.DT_DRBD8:
5275
    if len(secondary_nodes) != 1:
5276
      raise errors.ProgrammerError("Wrong template configuration")
5277
    remote_node = secondary_nodes[0]
5278
    minors = lu.cfg.AllocateDRBDMinor(
5279
      [primary_node, remote_node] * len(disk_info), instance_name)
5280

    
5281
    names = []
5282
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5283
                                               for i in range(disk_count)]):
5284
      names.append(lv_prefix + "_data")
5285
      names.append(lv_prefix + "_meta")
5286
    for idx, disk in enumerate(disk_info):
5287
      disk_index = idx + base_index
5288
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5289
                                      disk["size"], names[idx*2:idx*2+2],
5290
                                      "disk/%d" % disk_index,
5291
                                      minors[idx*2], minors[idx*2+1])
5292
      disk_dev.mode = disk["mode"]
5293
      disks.append(disk_dev)
5294
  elif template_name == constants.DT_FILE:
5295
    if len(secondary_nodes) != 0:
5296
      raise errors.ProgrammerError("Wrong template configuration")
5297

    
5298
    for idx, disk in enumerate(disk_info):
5299
      disk_index = idx + base_index
5300
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5301
                              iv_name="disk/%d" % disk_index,
5302
                              logical_id=(file_driver,
5303
                                          "%s/disk%d" % (file_storage_dir,
5304
                                                         disk_index)),
5305
                              mode=disk["mode"])
5306
      disks.append(disk_dev)
5307
  else:
5308
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5309
  return disks
5310

    
5311

    
5312
def _GetInstanceInfoText(instance):
5313
  """Compute that text that should be added to the disk's metadata.
5314

5315
  """
5316
  return "originstname+%s" % instance.name
5317

    
5318

    
5319
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5320
  """Create all disks for an instance.
5321

5322
  This abstracts away some work from AddInstance.
5323

5324
  @type lu: L{LogicalUnit}
5325
  @param lu: the logical unit on whose behalf we execute
5326
  @type instance: L{objects.Instance}
5327
  @param instance: the instance whose disks we should create
5328
  @type to_skip: list
5329
  @param to_skip: list of indices to skip
5330
  @type target_node: string
5331
  @param target_node: if passed, overrides the target node for creation
5332
  @rtype: boolean
5333
  @return: the success of the creation
5334

5335
  """
5336
  info = _GetInstanceInfoText(instance)
5337
  if target_node is None:
5338
    pnode = instance.primary_node
5339
    all_nodes = instance.all_nodes
5340
  else:
5341
    pnode = target_node
5342
    all_nodes = [pnode]
5343

    
5344
  if instance.disk_template == constants.DT_FILE:
5345
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5346
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5347

    
5348
    result.Raise("Failed to create directory '%s' on"
5349
                 " node %s" % (file_storage_dir, pnode))
5350

    
5351
  # Note: this needs to be kept in sync with adding of disks in
5352
  # LUSetInstanceParams
5353
  for idx, device in enumerate(instance.disks):
5354
    if to_skip and idx in to_skip:
5355
      continue
5356
    logging.info("Creating volume %s for instance %s",
5357
                 device.iv_name, instance.name)
5358
    #HARDCODE
5359
    for node in all_nodes:
5360
      f_create = node == pnode
5361
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5362

    
5363

    
5364
def _RemoveDisks(lu, instance, target_node=None):
5365
  """Remove all disks for an instance.
5366

5367
  This abstracts away some work from `AddInstance()` and
5368
  `RemoveInstance()`. Note that in case some of the devices couldn't
5369
  be removed, the removal will continue with the other ones (compare
5370
  with `_CreateDisks()`).
5371

5372
  @type lu: L{LogicalUnit}
5373
  @param lu: the logical unit on whose behalf we execute
5374
  @type instance: L{objects.Instance}
5375
  @param instance: the instance whose disks we should remove
5376
  @type target_node: string
5377
  @param target_node: used to override the node on which to remove the disks
5378
  @rtype: boolean
5379
  @return: the success of the removal
5380

5381
  """
5382
  logging.info("Removing block devices for instance %s", instance.name)
5383

    
5384
  all_result = True
5385
  for device in instance.disks:
5386
    if target_node:
5387
      edata = [(target_node, device)]
5388
    else:
5389
      edata = device.ComputeNodeTree(instance.primary_node)
5390
    for node, disk in edata:
5391
      lu.cfg.SetDiskID(disk, node)
5392
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5393
      if msg:
5394
        lu.LogWarning("Could not remove block device %s on node %s,"
5395
                      " continuing anyway: %s", device.iv_name, node, msg)
5396
        all_result = False
5397

    
5398
  if instance.disk_template == constants.DT_FILE:
5399
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5400
    if target_node:
5401
      tgt = target_node
5402
    else:
5403
      tgt = instance.primary_node
5404
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5405
    if result.fail_msg:
5406
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5407
                    file_storage_dir, instance.primary_node, result.fail_msg)
5408
      all_result = False
5409

    
5410
  return all_result
5411

    
5412

    
5413
def _ComputeDiskSize(disk_template, disks):
5414
  """Compute disk size requirements in the volume group
5415

5416
  """
5417
  # Required free disk space as a function of disk and swap space
5418
  req_size_dict = {
5419
    constants.DT_DISKLESS: None,
5420
    constants.DT_PLAIN: sum(d["size"] for d in disks),
5421
    # 128 MB are added for drbd metadata for each disk
5422
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5423
    constants.DT_FILE: None,
5424
  }
5425

    
5426
  if disk_template not in req_size_dict:
5427
    raise errors.ProgrammerError("Disk template '%s' size requirement"
5428
                                 " is unknown" %  disk_template)
5429

    
5430
  return req_size_dict[disk_template]
5431

    
5432

    
5433
def _CheckHVParams(lu, nodenames, hvname, hvparams):
5434
  """Hypervisor parameter validation.
5435

5436
  This function abstract the hypervisor parameter validation to be
5437
  used in both instance create and instance modify.
5438

5439
  @type lu: L{LogicalUnit}
5440
  @param lu: the logical unit for which we check
5441
  @type nodenames: list
5442
  @param nodenames: the list of nodes on which we should check
5443
  @type hvname: string
5444
  @param hvname: the name of the hypervisor we should use
5445
  @type hvparams: dict
5446
  @param hvparams: the parameters which we need to check
5447
  @raise errors.OpPrereqError: if the parameters are not valid
5448

5449
  """
5450
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5451
                                                  hvname,
5452
                                                  hvparams)
5453
  for node in nodenames:
5454
    info = hvinfo[node]
5455
    if info.offline:
5456
      continue
5457
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
5458

    
5459

    
5460
class LUCreateInstance(LogicalUnit):
5461
  """Create an instance.
5462

5463
  """
5464
  HPATH = "instance-add"
5465
  HTYPE = constants.HTYPE_INSTANCE
5466
  _OP_REQP = ["instance_name", "disks", "disk_template",
5467
              "mode", "start",
5468
              "wait_for_sync", "ip_check", "nics",
5469
              "hvparams", "beparams"]
5470
  REQ_BGL = False
5471

    
5472
  def _ExpandNode(self, node):
5473
    """Expands and checks one node name.
5474

5475
    """
5476
    node_full = self.cfg.ExpandNodeName(node)
5477
    if node_full is None:
5478
      raise errors.OpPrereqError("Unknown node %s" % node)
5479
    return node_full
5480

    
5481
  def ExpandNames(self):
5482
    """ExpandNames for CreateInstance.
5483

5484
    Figure out the right locks for instance creation.
5485

5486
    """
5487
    self.needed_locks = {}
5488

    
5489
    # set optional parameters to none if they don't exist
5490
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
5491
      if not hasattr(self.op, attr):
5492
        setattr(self.op, attr, None)
5493

    
5494
    # cheap checks, mostly valid constants given
5495

    
5496
    # verify creation mode
5497
    if self.op.mode not in (constants.INSTANCE_CREATE,
5498
                            constants.INSTANCE_IMPORT):
5499
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
5500
                                 self.op.mode)
5501

    
5502
    # disk template and mirror node verification
5503
    if self.op.disk_template not in constants.DISK_TEMPLATES:
5504
      raise errors.OpPrereqError("Invalid disk template name")
5505

    
5506
    if self.op.hypervisor is None:
5507
      self.op.hypervisor = self.cfg.GetHypervisorType()
5508

    
5509
    cluster = self.cfg.GetClusterInfo()
5510
    enabled_hvs = cluster.enabled_hypervisors
5511
    if self.op.hypervisor not in enabled_hvs:
5512
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
5513
                                 " cluster (%s)" % (self.op.hypervisor,
5514
                                  ",".join(enabled_hvs)))
5515

    
5516
    # check hypervisor parameter syntax (locally)
5517
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5518
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
5519
                                  self.op.hvparams)
5520
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
5521
    hv_type.CheckParameterSyntax(filled_hvp)
5522
    self.hv_full = filled_hvp
5523

    
5524
    # fill and remember the beparams dict
5525
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5526
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
5527
                                    self.op.beparams)
5528

    
5529
    #### instance parameters check
5530

    
5531
    # instance name verification
5532
    hostname1 = utils.HostInfo(self.op.instance_name)
5533
    self.op.instance_name = instance_name = hostname1.name
5534

    
5535
    # this is just a preventive check, but someone might still add this
5536
    # instance in the meantime, and creation will fail at lock-add time
5537
    if instance_name in self.cfg.GetInstanceList():
5538
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5539
                                 instance_name)
5540

    
5541
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
5542

    
5543
    # NIC buildup
5544
    self.nics = []
5545
    for idx, nic in enumerate(self.op.nics):
5546
      nic_mode_req = nic.get("mode", None)
5547
      nic_mode = nic_mode_req
5548
      if nic_mode is None:
5549
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5550

    
5551
      # in routed mode, for the first nic, the default ip is 'auto'
5552
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
5553
        default_ip_mode = constants.VALUE_AUTO
5554
      else:
5555
        default_ip_mode = constants.VALUE_NONE
5556

    
5557
      # ip validity checks
5558
      ip = nic.get("ip", default_ip_mode)
5559
      if ip is None or ip.lower() == constants.VALUE_NONE:
5560
        nic_ip = None
5561
      elif ip.lower() == constants.VALUE_AUTO:
5562
        nic_ip = hostname1.ip
5563
      else:
5564
        if not utils.IsValidIP(ip):
5565
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
5566
                                     " like a valid IP" % ip)
5567
        nic_ip = ip
5568

    
5569
      # TODO: check the ip for uniqueness !!
5570
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
5571
        raise errors.OpPrereqError("Routed nic mode requires an ip address")
5572

    
5573
      # MAC address verification
5574
      mac = nic.get("mac", constants.VALUE_AUTO)
5575
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5576
        if not utils.IsValidMac(mac.lower()):
5577
          raise errors.OpPrereqError("Invalid MAC address specified: %s" %
5578
                                     mac)
5579
        else:
5580
          # or validate/reserve the current one
5581
          if self.cfg.IsMacInUse(mac):
5582
            raise errors.OpPrereqError("MAC address %s already in use"
5583
                                       " in cluster" % mac)
5584

    
5585
      # bridge verification
5586
      bridge = nic.get("bridge", None)
5587
      link = nic.get("link", None)
5588
      if bridge and link:
5589
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
5590
                                   " at the same time")
5591
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
5592
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic")
5593
      elif bridge:
5594
        link = bridge
5595

    
5596
      nicparams = {}
5597
      if nic_mode_req:
5598
        nicparams[constants.NIC_MODE] = nic_mode_req
5599
      if link:
5600
        nicparams[constants.NIC_LINK] = link
5601

    
5602
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
5603
                                      nicparams)
5604
      objects.NIC.CheckParameterSyntax(check_params)
5605
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
5606

    
5607
    # disk checks/pre-build
5608
    self.disks = []
5609
    for disk in self.op.disks:
5610
      mode = disk.get("mode", constants.DISK_RDWR)
5611
      if mode not in constants.DISK_ACCESS_SET:
5612
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
5613
                                   mode)
5614
      size = disk.get("size", None)
5615
      if size is None:
5616
        raise errors.OpPrereqError("Missing disk size")
5617
      try:
5618
        size = int(size)
5619
      except ValueError:
5620
        raise errors.OpPrereqError("Invalid disk size '%s'" % size)
5621
      self.disks.append({"size": size, "mode": mode})
5622

    
5623
    # used in CheckPrereq for ip ping check
5624
    self.check_ip = hostname1.ip
5625

    
5626
    # file storage checks
5627
    if (self.op.file_driver and
5628
        not self.op.file_driver in constants.FILE_DRIVER):
5629
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
5630
                                 self.op.file_driver)
5631

    
5632
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
5633
      raise errors.OpPrereqError("File storage directory path not absolute")
5634

    
5635
    ### Node/iallocator related checks
5636
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
5637
      raise errors.OpPrereqError("One and only one of iallocator and primary"
5638
                                 " node must be given")
5639

    
5640
    if self.op.iallocator:
5641
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5642
    else:
5643
      self.op.pnode = self._ExpandNode(self.op.pnode)
5644
      nodelist = [self.op.pnode]
5645
      if self.op.snode is not None:
5646
        self.op.snode = self._ExpandNode(self.op.snode)
5647
        nodelist.append(self.op.snode)
5648
      self.needed_locks[locking.LEVEL_NODE] = nodelist
5649

    
5650
    # in case of import lock the source node too
5651
    if self.op.mode == constants.INSTANCE_IMPORT:
5652
      src_node = getattr(self.op, "src_node", None)
5653
      src_path = getattr(self.op, "src_path", None)
5654

    
5655
      if src_path is None:
5656
        self.op.src_path = src_path = self.op.instance_name
5657

    
5658
      if src_node is None:
5659
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5660
        self.op.src_node = None
5661
        if os.path.isabs(src_path):
5662
          raise errors.OpPrereqError("Importing an instance from an absolute"
5663
                                     " path requires a source node option.")
5664
      else:
5665
        self.op.src_node = src_node = self._ExpandNode(src_node)
5666
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5667
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
5668
        if not os.path.isabs(src_path):
5669
          self.op.src_path = src_path = \
5670
            os.path.join(constants.EXPORT_DIR, src_path)
5671

    
5672
      # On import force_variant must be True, because if we forced it at
5673
      # initial install, our only chance when importing it back is that it
5674
      # works again!
5675
      self.op.force_variant = True
5676

    
5677
    else: # INSTANCE_CREATE
5678
      if getattr(self.op, "os_type", None) is None:
5679
        raise errors.OpPrereqError("No guest OS specified")
5680
      self.op.force_variant = getattr(self.op, "force_variant", False)
5681

    
5682
  def _RunAllocator(self):
5683
    """Run the allocator based on input opcode.
5684

5685
    """
5686
    nics = [n.ToDict() for n in self.nics]
5687
    ial = IAllocator(self.cfg, self.rpc,
5688
                     mode=constants.IALLOCATOR_MODE_ALLOC,
5689
                     name=self.op.instance_name,
5690
                     disk_template=self.op.disk_template,
5691
                     tags=[],
5692
                     os=self.op.os_type,
5693
                     vcpus=self.be_full[constants.BE_VCPUS],
5694
                     mem_size=self.be_full[constants.BE_MEMORY],
5695
                     disks=self.disks,
5696
                     nics=nics,
5697
                     hypervisor=self.op.hypervisor,
5698
                     )
5699

    
5700
    ial.Run(self.op.iallocator)
5701

    
5702
    if not ial.success:
5703
      raise errors.OpPrereqError("Can't compute nodes using"
5704
                                 " iallocator '%s': %s" % (self.op.iallocator,
5705
                                                           ial.info))
5706
    if len(ial.nodes) != ial.required_nodes:
5707
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5708
                                 " of nodes (%s), required %s" %
5709
                                 (self.op.iallocator, len(ial.nodes),
5710
                                  ial.required_nodes))
5711
    self.op.pnode = ial.nodes[0]
5712
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
5713
                 self.op.instance_name, self.op.iallocator,
5714
                 ", ".join(ial.nodes))
5715
    if ial.required_nodes == 2:
5716
      self.op.snode = ial.nodes[1]
5717

    
5718
  def BuildHooksEnv(self):
5719
    """Build hooks env.
5720

5721
    This runs on master, primary and secondary nodes of the instance.
5722

5723
    """
5724
    env = {
5725
      "ADD_MODE": self.op.mode,
5726
      }
5727
    if self.op.mode == constants.INSTANCE_IMPORT:
5728
      env["SRC_NODE"] = self.op.src_node
5729
      env["SRC_PATH"] = self.op.src_path
5730
      env["SRC_IMAGES"] = self.src_images
5731

    
5732
    env.update(_BuildInstanceHookEnv(
5733
      name=self.op.instance_name,
5734
      primary_node=self.op.pnode,
5735
      secondary_nodes=self.secondaries,
5736
      status=self.op.start,
5737
      os_type=self.op.os_type,
5738
      memory=self.be_full[constants.BE_MEMORY],
5739
      vcpus=self.be_full[constants.BE_VCPUS],
5740
      nics=_NICListToTuple(self, self.nics),
5741
      disk_template=self.op.disk_template,
5742
      disks=[(d["size"], d["mode"]) for d in self.disks],
5743
      bep=self.be_full,
5744
      hvp=self.hv_full,
5745
      hypervisor_name=self.op.hypervisor,
5746
    ))
5747

    
5748
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
5749
          self.secondaries)
5750
    return env, nl, nl
5751

    
5752

    
5753
  def CheckPrereq(self):
5754
    """Check prerequisites.
5755

5756
    """
5757
    if (not self.cfg.GetVGName() and
5758
        self.op.disk_template not in constants.DTS_NOT_LVM):
5759
      raise errors.OpPrereqError("Cluster does not support lvm-based"
5760
                                 " instances")
5761

    
5762
    if self.op.mode == constants.INSTANCE_IMPORT:
5763
      src_node = self.op.src_node
5764
      src_path = self.op.src_path
5765

    
5766
      if src_node is None:
5767
        locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
5768
        exp_list = self.rpc.call_export_list(locked_nodes)
5769
        found = False
5770
        for node in exp_list:
5771
          if exp_list[node].fail_msg:
5772
            continue
5773
          if src_path in exp_list[node].payload:
5774
            found = True
5775
            self.op.src_node = src_node = node
5776
            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
5777
                                                       src_path)
5778
            break
5779
        if not found:
5780
          raise errors.OpPrereqError("No export found for relative path %s" %
5781
                                      src_path)
5782

    
5783
      _CheckNodeOnline(self, src_node)
5784
      result = self.rpc.call_export_info(src_node, src_path)
5785
      result.Raise("No export or invalid export found in dir %s" % src_path)
5786

    
5787
      export_info = objects.SerializableConfigParser.Loads(str(result.payload))
5788
      if not export_info.has_section(constants.INISECT_EXP):
5789
        raise errors.ProgrammerError("Corrupted export config")
5790

    
5791
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
5792
      if (int(ei_version) != constants.EXPORT_VERSION):
5793
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
5794
                                   (ei_version, constants.EXPORT_VERSION))
5795

    
5796
      # Check that the new instance doesn't have less disks than the export
5797
      instance_disks = len(self.disks)
5798
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
5799
      if instance_disks < export_disks:
5800
        raise errors.OpPrereqError("Not enough disks to import."
5801
                                   " (instance: %d, export: %d)" %
5802
                                   (instance_disks, export_disks))
5803

    
5804
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
5805
      disk_images = []
5806
      for idx in range(export_disks):
5807
        option = 'disk%d_dump' % idx
5808
        if export_info.has_option(constants.INISECT_INS, option):
5809
          # FIXME: are the old os-es, disk sizes, etc. useful?
5810
          export_name = export_info.get(constants.INISECT_INS, option)
5811
          image = os.path.join(src_path, export_name)
5812
          disk_images.append(image)
5813
        else:
5814
          disk_images.append(False)
5815

    
5816
      self.src_images = disk_images
5817

    
5818
      old_name = export_info.get(constants.INISECT_INS, 'name')
5819
      # FIXME: int() here could throw a ValueError on broken exports
5820
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
5821
      if self.op.instance_name == old_name:
5822
        for idx, nic in enumerate(self.nics):
5823
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
5824
            nic_mac_ini = 'nic%d_mac' % idx
5825
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
5826

    
5827
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
5828
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
5829
    if self.op.start and not self.op.ip_check:
5830
      raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
5831
                                 " adding an instance in start mode")
5832

    
5833
    if self.op.ip_check:
5834
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
5835
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5836
                                   (self.check_ip, self.op.instance_name))
5837

    
5838
    #### mac address generation
5839
    # By generating here the mac address both the allocator and the hooks get
5840
    # the real final mac address rather than the 'auto' or 'generate' value.
5841
    # There is a race condition between the generation and the instance object
5842
    # creation, which means that we know the mac is valid now, but we're not
5843
    # sure it will be when we actually add the instance. If things go bad
5844
    # adding the instance will abort because of a duplicate mac, and the
5845
    # creation job will fail.
5846
    for nic in self.nics:
5847
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5848
        nic.mac = self.cfg.GenerateMAC()
5849

    
5850
    #### allocator run
5851

    
5852
    if self.op.iallocator is not None:
5853
      self._RunAllocator()
5854

    
5855
    #### node related checks
5856

    
5857
    # check primary node
5858
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
5859
    assert self.pnode is not None, \
5860
      "Cannot retrieve locked node %s" % self.op.pnode
5861
    if pnode.offline:
5862
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
5863
                                 pnode.name)
5864
    if pnode.drained:
5865
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
5866
                                 pnode.name)
5867

    
5868
    self.secondaries = []
5869

    
5870
    # mirror node verification
5871
    if self.op.disk_template in constants.DTS_NET_MIRROR:
5872
      if self.op.snode is None:
5873
        raise errors.OpPrereqError("The networked disk templates need"
5874
                                   " a mirror node")
5875
      if self.op.snode == pnode.name:
5876
        raise errors.OpPrereqError("The secondary node cannot be"
5877
                                   " the primary node.")
5878
      _CheckNodeOnline(self, self.op.snode)
5879
      _CheckNodeNotDrained(self, self.op.snode)
5880
      self.secondaries.append(self.op.snode)
5881

    
5882
    nodenames = [pnode.name] + self.secondaries
5883

    
5884
    req_size = _ComputeDiskSize(self.op.disk_template,
5885
                                self.disks)
5886

    
5887
    # Check lv size requirements
5888
    if req_size is not None:
5889
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
5890
                                         self.op.hypervisor)
5891
      for node in nodenames:
5892
        info = nodeinfo[node]
5893
        info.Raise("Cannot get current information from node %s" % node)
5894
        info = info.payload
5895
        vg_free = info.get('vg_free', None)
5896
        if not isinstance(vg_free, int):
5897
          raise errors.OpPrereqError("Can't compute free disk space on"
5898
                                     " node %s" % node)
5899
        if req_size > vg_free:
5900
          raise errors.OpPrereqError("Not enough disk space on target node %s."
5901
                                     " %d MB available, %d MB required" %
5902
                                     (node, vg_free, req_size))
5903

    
5904
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
5905

    
5906
    # os verification
5907
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
5908
    result.Raise("OS '%s' not in supported os list for primary node %s" %
5909
                 (self.op.os_type, pnode.name), prereq=True)
5910
    if not self.op.force_variant:
5911
      _CheckOSVariant(result.payload, self.op.os_type)
5912

    
5913
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
5914

    
5915
    # memory check on primary node
5916
    if self.op.start:
5917
      _CheckNodeFreeMemory(self, self.pnode.name,
5918
                           "creating instance %s" % self.op.instance_name,
5919
                           self.be_full[constants.BE_MEMORY],
5920
                           self.op.hypervisor)
5921

    
5922
    self.dry_run_result = list(nodenames)
5923

    
5924
  def Exec(self, feedback_fn):
5925
    """Create and add the instance to the cluster.
5926

5927
    """
5928
    instance = self.op.instance_name
5929
    pnode_name = self.pnode.name
5930

    
5931
    ht_kind = self.op.hypervisor
5932
    if ht_kind in constants.HTS_REQ_PORT:
5933
      network_port = self.cfg.AllocatePort()
5934
    else:
5935
      network_port = None
5936

    
5937
    ##if self.op.vnc_bind_address is None:
5938
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
5939

    
5940
    # this is needed because os.path.join does not accept None arguments
5941
    if self.op.file_storage_dir is None:
5942
      string_file_storage_dir = ""
5943
    else:
5944
      string_file_storage_dir = self.op.file_storage_dir
5945

    
5946
    # build the full file storage dir path
5947
    file_storage_dir = os.path.normpath(os.path.join(
5948
                                        self.cfg.GetFileStorageDir(),
5949
                                        string_file_storage_dir, instance))
5950

    
5951

    
5952
    disks = _GenerateDiskTemplate(self,
5953
                                  self.op.disk_template,
5954
                                  instance, pnode_name,
5955
                                  self.secondaries,
5956
                                  self.disks,
5957
                                  file_storage_dir,
5958
                                  self.op.file_driver,
5959
                                  0)
5960

    
5961
    iobj = objects.Instance(name=instance, os=self.op.os_type,
5962
                            primary_node=pnode_name,
5963
                            nics=self.nics, disks=disks,
5964
                            disk_template=self.op.disk_template,
5965
                            admin_up=False,
5966
                            network_port=network_port,
5967
                            beparams=self.op.beparams,
5968
                            hvparams=self.op.hvparams,
5969
                            hypervisor=self.op.hypervisor,
5970
                            )
5971

    
5972
    feedback_fn("* creating instance disks...")
5973
    try:
5974
      _CreateDisks(self, iobj)
5975
    except errors.OpExecError:
5976
      self.LogWarning("Device creation failed, reverting...")
5977
      try:
5978
        _RemoveDisks(self, iobj)
5979
      finally:
5980
        self.cfg.ReleaseDRBDMinors(instance)
5981
        raise
5982

    
5983
    feedback_fn("adding instance %s to cluster config" % instance)
5984

    
5985
    self.cfg.AddInstance(iobj)
5986
    # Declare that we don't want to remove the instance lock anymore, as we've
5987
    # added the instance to the config
5988
    del self.remove_locks[locking.LEVEL_INSTANCE]
5989
    # Unlock all the nodes
5990
    if self.op.mode == constants.INSTANCE_IMPORT:
5991
      nodes_keep = [self.op.src_node]
5992
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
5993
                       if node != self.op.src_node]
5994
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
5995
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
5996
    else:
5997
      self.context.glm.release(locking.LEVEL_NODE)
5998
      del self.acquired_locks[locking.LEVEL_NODE]
5999

    
6000
    if self.op.wait_for_sync:
6001
      disk_abort = not _WaitForSync(self, iobj)
6002
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
6003
      # make sure the disks are not degraded (still sync-ing is ok)
6004
      time.sleep(15)
6005
      feedback_fn("* checking mirrors status")
6006
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6007
    else:
6008
      disk_abort = False
6009

    
6010
    if disk_abort:
6011
      _RemoveDisks(self, iobj)
6012
      self.cfg.RemoveInstance(iobj.name)
6013
      # Make sure the instance lock gets removed
6014
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6015
      raise errors.OpExecError("There are some degraded disks for"
6016
                               " this instance")
6017

    
6018
    feedback_fn("creating os for instance %s on node %s" %
6019
                (instance, pnode_name))
6020

    
6021
    if iobj.disk_template != constants.DT_DISKLESS:
6022
      if self.op.mode == constants.INSTANCE_CREATE:
6023
        feedback_fn("* running the instance OS create scripts...")
6024
        result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
6025
        result.Raise("Could not add os for instance %s"
6026
                     " on node %s" % (instance, pnode_name))
6027

    
6028
      elif self.op.mode == constants.INSTANCE_IMPORT:
6029
        feedback_fn("* running the instance OS import scripts...")
6030
        src_node = self.op.src_node
6031
        src_images = self.src_images
6032
        cluster_name = self.cfg.GetClusterName()
6033
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6034
                                                         src_node, src_images,
6035
                                                         cluster_name)
6036
        msg = import_result.fail_msg
6037
        if msg:
6038
          self.LogWarning("Error while importing the disk images for instance"
6039
                          " %s on node %s: %s" % (instance, pnode_name, msg))
6040
      else:
6041
        # also checked in the prereq part
6042
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6043
                                     % self.op.mode)
6044

    
6045
    if self.op.start:
6046
      iobj.admin_up = True
6047
      self.cfg.Update(iobj, feedback_fn)
6048
      logging.info("Starting instance %s on node %s", instance, pnode_name)
6049
      feedback_fn("* starting instance...")
6050
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6051
      result.Raise("Could not start instance")
6052

    
6053
    return list(iobj.all_nodes)
6054

    
6055

    
6056
class LUConnectConsole(NoHooksLU):
6057
  """Connect to an instance's console.
6058

6059
  This is somewhat special in that it returns the command line that
6060
  you need to run on the master node in order to connect to the
6061
  console.
6062

6063
  """
6064
  _OP_REQP = ["instance_name"]
6065
  REQ_BGL = False
6066

    
6067
  def ExpandNames(self):
6068
    self._ExpandAndLockInstance()
6069

    
6070
  def CheckPrereq(self):
6071
    """Check prerequisites.
6072

6073
    This checks that the instance is in the cluster.
6074

6075
    """
6076
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6077
    assert self.instance is not None, \
6078
      "Cannot retrieve locked instance %s" % self.op.instance_name
6079
    _CheckNodeOnline(self, self.instance.primary_node)
6080

    
6081
  def Exec(self, feedback_fn):
6082
    """Connect to the console of an instance
6083

6084
    """
6085
    instance = self.instance
6086
    node = instance.primary_node
6087

    
6088
    node_insts = self.rpc.call_instance_list([node],
6089
                                             [instance.hypervisor])[node]
6090
    node_insts.Raise("Can't get node information from %s" % node)
6091

    
6092
    if instance.name not in node_insts.payload:
6093
      raise errors.OpExecError("Instance %s is not running." % instance.name)
6094

    
6095
    logging.debug("Connecting to console of %s on %s", instance.name, node)
6096

    
6097
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
6098
    cluster = self.cfg.GetClusterInfo()
6099
    # beparams and hvparams are passed separately, to avoid editing the
6100
    # instance and then saving the defaults in the instance itself.
6101
    hvparams = cluster.FillHV(instance)
6102
    beparams = cluster.FillBE(instance)
6103
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6104

    
6105
    # build ssh cmdline
6106
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6107

    
6108

    
6109
class LUReplaceDisks(LogicalUnit):
6110
  """Replace the disks of an instance.
6111

6112
  """
6113
  HPATH = "mirrors-replace"
6114
  HTYPE = constants.HTYPE_INSTANCE
6115
  _OP_REQP = ["instance_name", "mode", "disks"]
6116
  REQ_BGL = False
6117

    
6118
  def CheckArguments(self):
6119
    if not hasattr(self.op, "remote_node"):
6120
      self.op.remote_node = None
6121
    if not hasattr(self.op, "iallocator"):
6122
      self.op.iallocator = None
6123

    
6124
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6125
                                  self.op.iallocator)
6126

    
6127
  def ExpandNames(self):
6128
    self._ExpandAndLockInstance()
6129

    
6130
    if self.op.iallocator is not None:
6131
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6132

    
6133
    elif self.op.remote_node is not None:
6134
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
6135
      if remote_node is None:
6136
        raise errors.OpPrereqError("Node '%s' not known" %
6137
                                   self.op.remote_node)
6138

    
6139
      self.op.remote_node = remote_node
6140

    
6141
      # Warning: do not remove the locking of the new secondary here
6142
      # unless DRBD8.AddChildren is changed to work in parallel;
6143
      # currently it doesn't since parallel invocations of
6144
      # FindUnusedMinor will conflict
6145
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6146
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6147

    
6148
    else:
6149
      self.needed_locks[locking.LEVEL_NODE] = []
6150
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6151

    
6152
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6153
                                   self.op.iallocator, self.op.remote_node,
6154
                                   self.op.disks)
6155

    
6156
    self.tasklets = [self.replacer]
6157

    
6158
  def DeclareLocks(self, level):
6159
    # If we're not already locking all nodes in the set we have to declare the
6160
    # instance's primary/secondary nodes.
6161
    if (level == locking.LEVEL_NODE and
6162
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6163
      self._LockInstancesNodes()
6164

    
6165
  def BuildHooksEnv(self):
6166
    """Build hooks env.
6167

6168
    This runs on the master, the primary and all the secondaries.
6169

6170
    """
6171
    instance = self.replacer.instance
6172
    env = {
6173
      "MODE": self.op.mode,
6174
      "NEW_SECONDARY": self.op.remote_node,
6175
      "OLD_SECONDARY": instance.secondary_nodes[0],
6176
      }
6177
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6178
    nl = [
6179
      self.cfg.GetMasterNode(),
6180
      instance.primary_node,
6181
      ]
6182
    if self.op.remote_node is not None:
6183
      nl.append(self.op.remote_node)
6184
    return env, nl, nl
6185

    
6186

    
6187
class LUEvacuateNode(LogicalUnit):
6188
  """Relocate the secondary instances from a node.
6189

6190
  """
6191
  HPATH = "node-evacuate"
6192
  HTYPE = constants.HTYPE_NODE
6193
  _OP_REQP = ["node_name"]
6194
  REQ_BGL = False
6195

    
6196
  def CheckArguments(self):
6197
    if not hasattr(self.op, "remote_node"):
6198
      self.op.remote_node = None
6199
    if not hasattr(self.op, "iallocator"):
6200
      self.op.iallocator = None
6201

    
6202
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6203
                                  self.op.remote_node,
6204
                                  self.op.iallocator)
6205

    
6206
  def ExpandNames(self):
6207
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
6208
    if self.op.node_name is None:
6209
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
6210

    
6211
    self.needed_locks = {}
6212

    
6213
    # Declare node locks
6214
    if self.op.iallocator is not None:
6215
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6216

    
6217
    elif self.op.remote_node is not None:
6218
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
6219
      if remote_node is None:
6220
        raise errors.OpPrereqError("Node '%s' not known" %
6221
                                   self.op.remote_node)
6222

    
6223
      self.op.remote_node = remote_node
6224

    
6225
      # Warning: do not remove the locking of the new secondary here
6226
      # unless DRBD8.AddChildren is changed to work in parallel;
6227
      # currently it doesn't since parallel invocations of
6228
      # FindUnusedMinor will conflict
6229
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6230
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6231

    
6232
    else:
6233
      raise errors.OpPrereqError("Invalid parameters")
6234

    
6235
    # Create tasklets for replacing disks for all secondary instances on this
6236
    # node
6237
    names = []
6238
    tasklets = []
6239

    
6240
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6241
      logging.debug("Replacing disks for instance %s", inst.name)
6242
      names.append(inst.name)
6243

    
6244
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6245
                                self.op.iallocator, self.op.remote_node, [])
6246
      tasklets.append(replacer)
6247

    
6248
    self.tasklets = tasklets
6249
    self.instance_names = names
6250

    
6251
    # Declare instance locks
6252
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6253

    
6254
  def DeclareLocks(self, level):
6255
    # If we're not already locking all nodes in the set we have to declare the
6256
    # instance's primary/secondary nodes.
6257
    if (level == locking.LEVEL_NODE and
6258
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6259
      self._LockInstancesNodes()
6260

    
6261
  def BuildHooksEnv(self):
6262
    """Build hooks env.
6263

6264
    This runs on the master, the primary and all the secondaries.
6265

6266
    """
6267
    env = {
6268
      "NODE_NAME": self.op.node_name,
6269
      }
6270

    
6271
    nl = [self.cfg.GetMasterNode()]
6272

    
6273
    if self.op.remote_node is not None:
6274
      env["NEW_SECONDARY"] = self.op.remote_node
6275
      nl.append(self.op.remote_node)
6276

    
6277
    return (env, nl, nl)
6278

    
6279

    
6280
class TLReplaceDisks(Tasklet):
6281
  """Replaces disks for an instance.
6282

6283
  Note: Locking is not within the scope of this class.
6284

6285
  """
6286
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6287
               disks):
6288
    """Initializes this class.
6289

6290
    """
6291
    Tasklet.__init__(self, lu)
6292

    
6293
    # Parameters
6294
    self.instance_name = instance_name
6295
    self.mode = mode
6296
    self.iallocator_name = iallocator_name
6297
    self.remote_node = remote_node
6298
    self.disks = disks
6299

    
6300
    # Runtime data
6301
    self.instance = None
6302
    self.new_node = None
6303
    self.target_node = None
6304
    self.other_node = None
6305
    self.remote_node_info = None
6306
    self.node_secondary_ip = None
6307

    
6308
  @staticmethod
6309
  def CheckArguments(mode, remote_node, iallocator):
6310
    """Helper function for users of this class.
6311

6312
    """
6313
    # check for valid parameter combination
6314
    if mode == constants.REPLACE_DISK_CHG:
6315
      if remote_node is None and iallocator is None:
6316
        raise errors.OpPrereqError("When changing the secondary either an"
6317
                                   " iallocator script must be used or the"
6318
                                   " new node given")
6319

    
6320
      if remote_node is not None and iallocator is not None:
6321
        raise errors.OpPrereqError("Give either the iallocator or the new"
6322
                                   " secondary, not both")
6323

    
6324
    elif remote_node is not None or iallocator is not None:
6325
      # Not replacing the secondary
6326
      raise errors.OpPrereqError("The iallocator and new node options can"
6327
                                 " only be used when changing the"
6328
                                 " secondary node")
6329

    
6330
  @staticmethod
6331
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
6332
    """Compute a new secondary node using an IAllocator.
6333

6334
    """
6335
    ial = IAllocator(lu.cfg, lu.rpc,
6336
                     mode=constants.IALLOCATOR_MODE_RELOC,
6337
                     name=instance_name,
6338
                     relocate_from=relocate_from)
6339

    
6340
    ial.Run(iallocator_name)
6341

    
6342
    if not ial.success:
6343
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
6344
                                 " %s" % (iallocator_name, ial.info))
6345

    
6346
    if len(ial.nodes) != ial.required_nodes:
6347
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6348
                                 " of nodes (%s), required %s" %
6349
                                 (len(ial.nodes), ial.required_nodes))
6350

    
6351
    remote_node_name = ial.nodes[0]
6352

    
6353
    lu.LogInfo("Selected new secondary for instance '%s': %s",
6354
               instance_name, remote_node_name)
6355

    
6356
    return remote_node_name
6357

    
6358
  def _FindFaultyDisks(self, node_name):
6359
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
6360
                                    node_name, True)
6361

    
6362
  def CheckPrereq(self):
6363
    """Check prerequisites.
6364

6365
    This checks that the instance is in the cluster.
6366

6367
    """
6368
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
6369
    assert instance is not None, \
6370
      "Cannot retrieve locked instance %s" % self.instance_name
6371

    
6372
    if instance.disk_template != constants.DT_DRBD8:
6373
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
6374
                                 " instances")
6375

    
6376
    if len(instance.secondary_nodes) != 1:
6377
      raise errors.OpPrereqError("The instance has a strange layout,"
6378
                                 " expected one secondary but found %d" %
6379
                                 len(instance.secondary_nodes))
6380

    
6381
    secondary_node = instance.secondary_nodes[0]
6382

    
6383
    if self.iallocator_name is None:
6384
      remote_node = self.remote_node
6385
    else:
6386
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
6387
                                       instance.name, instance.secondary_nodes)
6388

    
6389
    if remote_node is not None:
6390
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
6391
      assert self.remote_node_info is not None, \
6392
        "Cannot retrieve locked node %s" % remote_node
6393
    else:
6394
      self.remote_node_info = None
6395

    
6396
    if remote_node == self.instance.primary_node:
6397
      raise errors.OpPrereqError("The specified node is the primary node of"
6398
                                 " the instance.")
6399

    
6400
    if remote_node == secondary_node:
6401
      raise errors.OpPrereqError("The specified node is already the"
6402
                                 " secondary node of the instance.")
6403

    
6404
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
6405
                                    constants.REPLACE_DISK_CHG):
6406
      raise errors.OpPrereqError("Cannot specify disks to be replaced")
6407

    
6408
    if self.mode == constants.REPLACE_DISK_AUTO:
6409
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
6410
      faulty_secondary = self._FindFaultyDisks(secondary_node)
6411

    
6412
      if faulty_primary and faulty_secondary:
6413
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
6414
                                   " one node and can not be repaired"
6415
                                   " automatically" % self.instance_name)
6416

    
6417
      if faulty_primary:
6418
        self.disks = faulty_primary
6419
        self.target_node = instance.primary_node
6420
        self.other_node = secondary_node
6421
        check_nodes = [self.target_node, self.other_node]
6422
      elif faulty_secondary:
6423
        self.disks = faulty_secondary
6424
        self.target_node = secondary_node
6425
        self.other_node = instance.primary_node
6426
        check_nodes = [self.target_node, self.other_node]
6427
      else:
6428
        self.disks = []
6429
        check_nodes = []
6430

    
6431
    else:
6432
      # Non-automatic modes
6433
      if self.mode == constants.REPLACE_DISK_PRI:
6434
        self.target_node = instance.primary_node
6435
        self.other_node = secondary_node
6436
        check_nodes = [self.target_node, self.other_node]
6437

    
6438
      elif self.mode == constants.REPLACE_DISK_SEC:
6439
        self.target_node = secondary_node
6440
        self.other_node = instance.primary_node
6441
        check_nodes = [self.target_node, self.other_node]
6442

    
6443
      elif self.mode == constants.REPLACE_DISK_CHG:
6444
        self.new_node = remote_node
6445
        self.other_node = instance.primary_node
6446
        self.target_node = secondary_node
6447
        check_nodes = [self.new_node, self.other_node]
6448

    
6449
        _CheckNodeNotDrained(self.lu, remote_node)
6450

    
6451
      else:
6452
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
6453
                                     self.mode)
6454

    
6455
      # If not specified all disks should be replaced
6456
      if not self.disks:
6457
        self.disks = range(len(self.instance.disks))
6458

    
6459
    for node in check_nodes:
6460
      _CheckNodeOnline(self.lu, node)
6461

    
6462
    # Check whether disks are valid
6463
    for disk_idx in self.disks:
6464
      instance.FindDisk(disk_idx)
6465

    
6466
    # Get secondary node IP addresses
6467
    node_2nd_ip = {}
6468

    
6469
    for node_name in [self.target_node, self.other_node, self.new_node]:
6470
      if node_name is not None:
6471
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
6472

    
6473
    self.node_secondary_ip = node_2nd_ip
6474

    
6475
  def Exec(self, feedback_fn):
6476
    """Execute disk replacement.
6477

6478
    This dispatches the disk replacement to the appropriate handler.
6479

6480
    """
6481
    if not self.disks:
6482
      feedback_fn("No disks need replacement")
6483
      return
6484

    
6485
    feedback_fn("Replacing disk(s) %s for %s" %
6486
                (", ".join([str(i) for i in self.disks]), self.instance.name))
6487

    
6488
    activate_disks = (not self.instance.admin_up)
6489

    
6490
    # Activate the instance disks if we're replacing them on a down instance
6491
    if activate_disks:
6492
      _StartInstanceDisks(self.lu, self.instance, True)
6493

    
6494
    try:
6495
      # Should we replace the secondary node?
6496
      if self.new_node is not None:
6497
        fn = self._ExecDrbd8Secondary
6498
      else:
6499
        fn = self._ExecDrbd8DiskOnly
6500

    
6501
      return fn(feedback_fn)
6502

    
6503
    finally:
6504
      # Deactivate the instance disks if we're replacing them on a down instance
6505
      if activate_disks:
6506
        _SafeShutdownInstanceDisks(self.lu, self.instance)
6507

    
6508
  def _CheckVolumeGroup(self, nodes):
6509
    self.lu.LogInfo("Checking volume groups")
6510

    
6511
    vgname = self.cfg.GetVGName()
6512

    
6513
    # Make sure volume group exists on all involved nodes
6514
    results = self.rpc.call_vg_list(nodes)
6515
    if not results:
6516
      raise errors.OpExecError("Can't list volume groups on the nodes")
6517

    
6518
    for node in nodes:
6519
      res = results[node]
6520
      res.Raise("Error checking node %s" % node)
6521
      if vgname not in res.payload:
6522
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
6523
                                 (vgname, node))
6524

    
6525
  def _CheckDisksExistence(self, nodes):
6526
    # Check disk existence
6527
    for idx, dev in enumerate(self.instance.disks):
6528
      if idx not in self.disks:
6529
        continue
6530

    
6531
      for node in nodes:
6532
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
6533
        self.cfg.SetDiskID(dev, node)
6534

    
6535
        result = self.rpc.call_blockdev_find(node, dev)
6536

    
6537
        msg = result.fail_msg
6538
        if msg or not result.payload:
6539
          if not msg:
6540
            msg = "disk not found"
6541
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
6542
                                   (idx, node, msg))
6543

    
6544
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
6545
    for idx, dev in enumerate(self.instance.disks):
6546
      if idx not in self.disks:
6547
        continue
6548

    
6549
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
6550
                      (idx, node_name))
6551

    
6552
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
6553
                                   ldisk=ldisk):
6554
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
6555
                                 " replace disks for instance %s" %
6556
                                 (node_name, self.instance.name))
6557

    
6558
  def _CreateNewStorage(self, node_name):
6559
    vgname = self.cfg.GetVGName()
6560
    iv_names = {}
6561

    
6562
    for idx, dev in enumerate(self.instance.disks):
6563
      if idx not in self.disks:
6564
        continue
6565

    
6566
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
6567

    
6568
      self.cfg.SetDiskID(dev, node_name)
6569

    
6570
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
6571
      names = _GenerateUniqueNames(self.lu, lv_names)
6572

    
6573
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
6574
                             logical_id=(vgname, names[0]))
6575
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6576
                             logical_id=(vgname, names[1]))
6577

    
6578
      new_lvs = [lv_data, lv_meta]
6579
      old_lvs = dev.children
6580
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
6581

    
6582
      # we pass force_create=True to force the LVM creation
6583
      for new_lv in new_lvs:
6584
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
6585
                        _GetInstanceInfoText(self.instance), False)
6586

    
6587
    return iv_names
6588

    
6589
  def _CheckDevices(self, node_name, iv_names):
6590
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
6591
      self.cfg.SetDiskID(dev, node_name)
6592

    
6593
      result = self.rpc.call_blockdev_find(node_name, dev)
6594

    
6595
      msg = result.fail_msg
6596
      if msg or not result.payload:
6597
        if not msg:
6598
          msg = "disk not found"
6599
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
6600
                                 (name, msg))
6601

    
6602
      if result.payload.is_degraded:
6603
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
6604

    
6605
  def _RemoveOldStorage(self, node_name, iv_names):
6606
    for name, (dev, old_lvs, _) in iv_names.iteritems():
6607
      self.lu.LogInfo("Remove logical volumes for %s" % name)
6608

    
6609
      for lv in old_lvs:
6610
        self.cfg.SetDiskID(lv, node_name)
6611

    
6612
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
6613
        if msg:
6614
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
6615
                             hint="remove unused LVs manually")
6616

    
6617
  def _ExecDrbd8DiskOnly(self, feedback_fn):
6618
    """Replace a disk on the primary or secondary for DRBD 8.
6619

6620
    The algorithm for replace is quite complicated:
6621

6622
      1. for each disk to be replaced:
6623

6624
        1. create new LVs on the target node with unique names
6625
        1. detach old LVs from the drbd device
6626
        1. rename old LVs to name_replaced.<time_t>
6627
        1. rename new LVs to old LVs
6628
        1. attach the new LVs (with the old names now) to the drbd device
6629

6630
      1. wait for sync across all devices
6631

6632
      1. for each modified disk:
6633

6634
        1. remove old LVs (which have the name name_replaces.<time_t>)
6635

6636
    Failures are not very well handled.
6637

6638
    """
6639
    steps_total = 6
6640

    
6641
    # Step: check device activation
6642
    self.lu.LogStep(1, steps_total, "Check device existence")
6643
    self._CheckDisksExistence([self.other_node, self.target_node])
6644
    self._CheckVolumeGroup([self.target_node, self.other_node])
6645

    
6646
    # Step: check other node consistency
6647
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6648
    self._CheckDisksConsistency(self.other_node,
6649
                                self.other_node == self.instance.primary_node,
6650
                                False)
6651

    
6652
    # Step: create new storage
6653
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6654
    iv_names = self._CreateNewStorage(self.target_node)
6655

    
6656
    # Step: for each lv, detach+rename*2+attach
6657
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6658
    for dev, old_lvs, new_lvs in iv_names.itervalues():
6659
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
6660

    
6661
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
6662
                                                     old_lvs)
6663
      result.Raise("Can't detach drbd from local storage on node"
6664
                   " %s for device %s" % (self.target_node, dev.iv_name))
6665
      #dev.children = []
6666
      #cfg.Update(instance)
6667

    
6668
      # ok, we created the new LVs, so now we know we have the needed
6669
      # storage; as such, we proceed on the target node to rename
6670
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
6671
      # using the assumption that logical_id == physical_id (which in
6672
      # turn is the unique_id on that node)
6673

    
6674
      # FIXME(iustin): use a better name for the replaced LVs
6675
      temp_suffix = int(time.time())
6676
      ren_fn = lambda d, suff: (d.physical_id[0],
6677
                                d.physical_id[1] + "_replaced-%s" % suff)
6678

    
6679
      # Build the rename list based on what LVs exist on the node
6680
      rename_old_to_new = []
6681
      for to_ren in old_lvs:
6682
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
6683
        if not result.fail_msg and result.payload:
6684
          # device exists
6685
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
6686

    
6687
      self.lu.LogInfo("Renaming the old LVs on the target node")
6688
      result = self.rpc.call_blockdev_rename(self.target_node,
6689
                                             rename_old_to_new)
6690
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
6691

    
6692
      # Now we rename the new LVs to the old LVs
6693
      self.lu.LogInfo("Renaming the new LVs on the target node")
6694
      rename_new_to_old = [(new, old.physical_id)
6695
                           for old, new in zip(old_lvs, new_lvs)]
6696
      result = self.rpc.call_blockdev_rename(self.target_node,
6697
                                             rename_new_to_old)
6698
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
6699

    
6700
      for old, new in zip(old_lvs, new_lvs):
6701
        new.logical_id = old.logical_id
6702
        self.cfg.SetDiskID(new, self.target_node)
6703

    
6704
      for disk in old_lvs:
6705
        disk.logical_id = ren_fn(disk, temp_suffix)
6706
        self.cfg.SetDiskID(disk, self.target_node)
6707

    
6708
      # Now that the new lvs have the old name, we can add them to the device
6709
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
6710
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
6711
                                                  new_lvs)
6712
      msg = result.fail_msg
6713
      if msg:
6714
        for new_lv in new_lvs:
6715
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
6716
                                               new_lv).fail_msg
6717
          if msg2:
6718
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
6719
                               hint=("cleanup manually the unused logical"
6720
                                     "volumes"))
6721
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
6722

    
6723
      dev.children = new_lvs
6724

    
6725
      self.cfg.Update(self.instance, feedback_fn)
6726

    
6727
    # Wait for sync
6728
    # This can fail as the old devices are degraded and _WaitForSync
6729
    # does a combined result over all disks, so we don't check its return value
6730
    self.lu.LogStep(5, steps_total, "Sync devices")
6731
    _WaitForSync(self.lu, self.instance, unlock=True)
6732

    
6733
    # Check all devices manually
6734
    self._CheckDevices(self.instance.primary_node, iv_names)
6735

    
6736
    # Step: remove old storage
6737
    self.lu.LogStep(6, steps_total, "Removing old storage")
6738
    self._RemoveOldStorage(self.target_node, iv_names)
6739

    
6740
  def _ExecDrbd8Secondary(self, feedback_fn):
6741
    """Replace the secondary node for DRBD 8.
6742

6743
    The algorithm for replace is quite complicated:
6744
      - for all disks of the instance:
6745
        - create new LVs on the new node with same names
6746
        - shutdown the drbd device on the old secondary
6747
        - disconnect the drbd network on the primary
6748
        - create the drbd device on the new secondary
6749
        - network attach the drbd on the primary, using an artifice:
6750
          the drbd code for Attach() will connect to the network if it
6751
          finds a device which is connected to the good local disks but
6752
          not network enabled
6753
      - wait for sync across all devices
6754
      - remove all disks from the old secondary
6755

6756
    Failures are not very well handled.
6757

6758
    """
6759
    steps_total = 6
6760

    
6761
    # Step: check device activation
6762
    self.lu.LogStep(1, steps_total, "Check device existence")
6763
    self._CheckDisksExistence([self.instance.primary_node])
6764
    self._CheckVolumeGroup([self.instance.primary_node])
6765

    
6766
    # Step: check other node consistency
6767
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6768
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
6769

    
6770
    # Step: create new storage
6771
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6772
    for idx, dev in enumerate(self.instance.disks):
6773
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
6774
                      (self.new_node, idx))
6775
      # we pass force_create=True to force LVM creation
6776
      for new_lv in dev.children:
6777
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
6778
                        _GetInstanceInfoText(self.instance), False)
6779

    
6780
    # Step 4: dbrd minors and drbd setups changes
6781
    # after this, we must manually remove the drbd minors on both the
6782
    # error and the success paths
6783
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6784
    minors = self.cfg.AllocateDRBDMinor([self.new_node
6785
                                         for dev in self.instance.disks],
6786
                                        self.instance.name)
6787
    logging.debug("Allocated minors %r" % (minors,))
6788

    
6789
    iv_names = {}
6790
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
6791
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
6792
                      (self.new_node, idx))
6793
      # create new devices on new_node; note that we create two IDs:
6794
      # one without port, so the drbd will be activated without
6795
      # networking information on the new node at this stage, and one
6796
      # with network, for the latter activation in step 4
6797
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
6798
      if self.instance.primary_node == o_node1:
6799
        p_minor = o_minor1
6800
      else:
6801
        p_minor = o_minor2
6802

    
6803
      new_alone_id = (self.instance.primary_node, self.new_node, None,
6804
                      p_minor, new_minor, o_secret)
6805
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
6806
                    p_minor, new_minor, o_secret)
6807

    
6808
      iv_names[idx] = (dev, dev.children, new_net_id)
6809
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
6810
                    new_net_id)
6811
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
6812
                              logical_id=new_alone_id,
6813
                              children=dev.children,
6814
                              size=dev.size)
6815
      try:
6816
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
6817
                              _GetInstanceInfoText(self.instance), False)
6818
      except errors.GenericError:
6819
        self.cfg.ReleaseDRBDMinors(self.instance.name)
6820
        raise
6821

    
6822
    # We have new devices, shutdown the drbd on the old secondary
6823
    for idx, dev in enumerate(self.instance.disks):
6824
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
6825
      self.cfg.SetDiskID(dev, self.target_node)
6826
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
6827
      if msg:
6828
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
6829
                           "node: %s" % (idx, msg),
6830
                           hint=("Please cleanup this device manually as"
6831
                                 " soon as possible"))
6832

    
6833
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
6834
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
6835
                                               self.node_secondary_ip,
6836
                                               self.instance.disks)\
6837
                                              [self.instance.primary_node]
6838

    
6839
    msg = result.fail_msg
6840
    if msg:
6841
      # detaches didn't succeed (unlikely)
6842
      self.cfg.ReleaseDRBDMinors(self.instance.name)
6843
      raise errors.OpExecError("Can't detach the disks from the network on"
6844
                               " old node: %s" % (msg,))
6845

    
6846
    # if we managed to detach at least one, we update all the disks of
6847
    # the instance to point to the new secondary
6848
    self.lu.LogInfo("Updating instance configuration")
6849
    for dev, _, new_logical_id in iv_names.itervalues():
6850
      dev.logical_id = new_logical_id
6851
      self.cfg.SetDiskID(dev, self.instance.primary_node)
6852

    
6853
    self.cfg.Update(self.instance, feedback_fn)
6854

    
6855
    # and now perform the drbd attach
6856
    self.lu.LogInfo("Attaching primary drbds to new secondary"
6857
                    " (standalone => connected)")
6858
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
6859
                                            self.new_node],
6860
                                           self.node_secondary_ip,
6861
                                           self.instance.disks,
6862
                                           self.instance.name,
6863
                                           False)
6864
    for to_node, to_result in result.items():
6865
      msg = to_result.fail_msg
6866
      if msg:
6867
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
6868
                           to_node, msg,
6869
                           hint=("please do a gnt-instance info to see the"
6870
                                 " status of disks"))
6871

    
6872
    # Wait for sync
6873
    # This can fail as the old devices are degraded and _WaitForSync
6874
    # does a combined result over all disks, so we don't check its return value
6875
    self.lu.LogStep(5, steps_total, "Sync devices")
6876
    _WaitForSync(self.lu, self.instance, unlock=True)
6877

    
6878
    # Check all devices manually
6879
    self._CheckDevices(self.instance.primary_node, iv_names)
6880

    
6881
    # Step: remove old storage
6882
    self.lu.LogStep(6, steps_total, "Removing old storage")
6883
    self._RemoveOldStorage(self.target_node, iv_names)
6884

    
6885

    
6886
class LURepairNodeStorage(NoHooksLU):
6887
  """Repairs the volume group on a node.
6888

6889
  """
6890
  _OP_REQP = ["node_name"]
6891
  REQ_BGL = False
6892

    
6893
  def CheckArguments(self):
6894
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
6895
    if node_name is None:
6896
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
6897

    
6898
    self.op.node_name = node_name
6899

    
6900
  def ExpandNames(self):
6901
    self.needed_locks = {
6902
      locking.LEVEL_NODE: [self.op.node_name],
6903
      }
6904

    
6905
  def _CheckFaultyDisks(self, instance, node_name):
6906
    if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
6907
                                node_name, True):
6908
      raise errors.OpPrereqError("Instance '%s' has faulty disks on"
6909
                                 " node '%s'" % (instance.name, node_name))
6910

    
6911
  def CheckPrereq(self):
6912
    """Check prerequisites.
6913

6914
    """
6915
    storage_type = self.op.storage_type
6916

    
6917
    if (constants.SO_FIX_CONSISTENCY not in
6918
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
6919
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
6920
                                 " repaired" % storage_type)
6921

    
6922
    # Check whether any instance on this node has faulty disks
6923
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
6924
      check_nodes = set(inst.all_nodes)
6925
      check_nodes.discard(self.op.node_name)
6926
      for inst_node_name in check_nodes:
6927
        self._CheckFaultyDisks(inst, inst_node_name)
6928

    
6929
  def Exec(self, feedback_fn):
6930
    feedback_fn("Repairing storage unit '%s' on %s ..." %
6931
                (self.op.name, self.op.node_name))
6932

    
6933
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
6934
    result = self.rpc.call_storage_execute(self.op.node_name,
6935
                                           self.op.storage_type, st_args,
6936
                                           self.op.name,
6937
                                           constants.SO_FIX_CONSISTENCY)
6938
    result.Raise("Failed to repair storage unit '%s' on %s" %
6939
                 (self.op.name, self.op.node_name))
6940

    
6941

    
6942
class LUGrowDisk(LogicalUnit):
6943
  """Grow a disk of an instance.
6944

6945
  """
6946
  HPATH = "disk-grow"
6947
  HTYPE = constants.HTYPE_INSTANCE
6948
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
6949
  REQ_BGL = False
6950

    
6951
  def ExpandNames(self):
6952
    self._ExpandAndLockInstance()
6953
    self.needed_locks[locking.LEVEL_NODE] = []
6954
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6955

    
6956
  def DeclareLocks(self, level):
6957
    if level == locking.LEVEL_NODE:
6958
      self._LockInstancesNodes()
6959

    
6960
  def BuildHooksEnv(self):
6961
    """Build hooks env.
6962

6963
    This runs on the master, the primary and all the secondaries.
6964

6965
    """
6966
    env = {
6967
      "DISK": self.op.disk,
6968
      "AMOUNT": self.op.amount,
6969
      }
6970
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6971
    nl = [
6972
      self.cfg.GetMasterNode(),
6973
      self.instance.primary_node,
6974
      ]
6975
    return env, nl, nl
6976

    
6977
  def CheckPrereq(self):
6978
    """Check prerequisites.
6979

6980
    This checks that the instance is in the cluster.
6981

6982
    """
6983
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6984
    assert instance is not None, \
6985
      "Cannot retrieve locked instance %s" % self.op.instance_name
6986
    nodenames = list(instance.all_nodes)
6987
    for node in nodenames:
6988
      _CheckNodeOnline(self, node)
6989

    
6990

    
6991
    self.instance = instance
6992

    
6993
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
6994
      raise errors.OpPrereqError("Instance's disk layout does not support"
6995
                                 " growing.")
6996

    
6997
    self.disk = instance.FindDisk(self.op.disk)
6998

    
6999
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
7000
                                       instance.hypervisor)
7001
    for node in nodenames:
7002
      info = nodeinfo[node]
7003
      info.Raise("Cannot get current information from node %s" % node)
7004
      vg_free = info.payload.get('vg_free', None)
7005
      if not isinstance(vg_free, int):
7006
        raise errors.OpPrereqError("Can't compute free disk space on"
7007
                                   " node %s" % node)
7008
      if self.op.amount > vg_free:
7009
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
7010
                                   " %d MiB available, %d MiB required" %
7011
                                   (node, vg_free, self.op.amount))
7012

    
7013
  def Exec(self, feedback_fn):
7014
    """Execute disk grow.
7015

7016
    """
7017
    instance = self.instance
7018
    disk = self.disk
7019
    for node in instance.all_nodes:
7020
      self.cfg.SetDiskID(disk, node)
7021
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7022
      result.Raise("Grow request failed to node %s" % node)
7023
    disk.RecordGrow(self.op.amount)
7024
    self.cfg.Update(instance, feedback_fn)
7025
    if self.op.wait_for_sync:
7026
      disk_abort = not _WaitForSync(self, instance)
7027
      if disk_abort:
7028
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7029
                             " status.\nPlease check the instance.")
7030

    
7031

    
7032
class LUQueryInstanceData(NoHooksLU):
7033
  """Query runtime instance data.
7034

7035
  """
7036
  _OP_REQP = ["instances", "static"]
7037
  REQ_BGL = False
7038

    
7039
  def ExpandNames(self):
7040
    self.needed_locks = {}
7041
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7042

    
7043
    if not isinstance(self.op.instances, list):
7044
      raise errors.OpPrereqError("Invalid argument type 'instances'")
7045

    
7046
    if self.op.instances:
7047
      self.wanted_names = []
7048
      for name in self.op.instances:
7049
        full_name = self.cfg.ExpandInstanceName(name)
7050
        if full_name is None:
7051
          raise errors.OpPrereqError("Instance '%s' not known" % name)
7052
        self.wanted_names.append(full_name)
7053
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7054
    else:
7055
      self.wanted_names = None
7056
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7057

    
7058
    self.needed_locks[locking.LEVEL_NODE] = []
7059
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7060

    
7061
  def DeclareLocks(self, level):
7062
    if level == locking.LEVEL_NODE:
7063
      self._LockInstancesNodes()
7064

    
7065
  def CheckPrereq(self):
7066
    """Check prerequisites.
7067

7068
    This only checks the optional instance list against the existing names.
7069

7070
    """
7071
    if self.wanted_names is None:
7072
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7073

    
7074
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7075
                             in self.wanted_names]
7076
    return
7077

    
7078
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
7079
    """Returns the status of a block device
7080

7081
    """
7082
    if self.op.static or not node:
7083
      return None
7084

    
7085
    self.cfg.SetDiskID(dev, node)
7086

    
7087
    result = self.rpc.call_blockdev_find(node, dev)
7088
    if result.offline:
7089
      return None
7090

    
7091
    result.Raise("Can't compute disk status for %s" % instance_name)
7092

    
7093
    status = result.payload
7094
    if status is None:
7095
      return None
7096

    
7097
    return (status.dev_path, status.major, status.minor,
7098
            status.sync_percent, status.estimated_time,
7099
            status.is_degraded, status.ldisk_status)
7100

    
7101
  def _ComputeDiskStatus(self, instance, snode, dev):
7102
    """Compute block device status.
7103

7104
    """
7105
    if dev.dev_type in constants.LDS_DRBD:
7106
      # we change the snode then (otherwise we use the one passed in)
7107
      if dev.logical_id[0] == instance.primary_node:
7108
        snode = dev.logical_id[1]
7109
      else:
7110
        snode = dev.logical_id[0]
7111

    
7112
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
7113
                                              instance.name, dev)
7114
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
7115

    
7116
    if dev.children:
7117
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
7118
                      for child in dev.children]
7119
    else:
7120
      dev_children = []
7121

    
7122
    data = {
7123
      "iv_name": dev.iv_name,
7124
      "dev_type": dev.dev_type,
7125
      "logical_id": dev.logical_id,
7126
      "physical_id": dev.physical_id,
7127
      "pstatus": dev_pstatus,
7128
      "sstatus": dev_sstatus,
7129
      "children": dev_children,
7130
      "mode": dev.mode,
7131
      "size": dev.size,
7132
      }
7133

    
7134
    return data
7135

    
7136
  def Exec(self, feedback_fn):
7137
    """Gather and return data"""
7138
    result = {}
7139

    
7140
    cluster = self.cfg.GetClusterInfo()
7141

    
7142
    for instance in self.wanted_instances:
7143
      if not self.op.static:
7144
        remote_info = self.rpc.call_instance_info(instance.primary_node,
7145
                                                  instance.name,
7146
                                                  instance.hypervisor)
7147
        remote_info.Raise("Error checking node %s" % instance.primary_node)
7148
        remote_info = remote_info.payload
7149
        if remote_info and "state" in remote_info:
7150
          remote_state = "up"
7151
        else:
7152
          remote_state = "down"
7153
      else:
7154
        remote_state = None
7155
      if instance.admin_up:
7156
        config_state = "up"
7157
      else:
7158
        config_state = "down"
7159

    
7160
      disks = [self._ComputeDiskStatus(instance, None, device)
7161
               for device in instance.disks]
7162

    
7163
      idict = {
7164
        "name": instance.name,
7165
        "config_state": config_state,
7166
        "run_state": remote_state,
7167
        "pnode": instance.primary_node,
7168
        "snodes": instance.secondary_nodes,
7169
        "os": instance.os,
7170
        # this happens to be the same format used for hooks
7171
        "nics": _NICListToTuple(self, instance.nics),
7172
        "disks": disks,
7173
        "hypervisor": instance.hypervisor,
7174
        "network_port": instance.network_port,
7175
        "hv_instance": instance.hvparams,
7176
        "hv_actual": cluster.FillHV(instance),
7177
        "be_instance": instance.beparams,
7178
        "be_actual": cluster.FillBE(instance),
7179
        "serial_no": instance.serial_no,
7180
        "mtime": instance.mtime,
7181
        "ctime": instance.ctime,
7182
        "uuid": instance.uuid,
7183
        }
7184

    
7185
      result[instance.name] = idict
7186

    
7187
    return result
7188

    
7189

    
7190
class LUSetInstanceParams(LogicalUnit):
7191
  """Modifies an instances's parameters.
7192

7193
  """
7194
  HPATH = "instance-modify"
7195
  HTYPE = constants.HTYPE_INSTANCE
7196
  _OP_REQP = ["instance_name"]
7197
  REQ_BGL = False
7198

    
7199
  def CheckArguments(self):
7200
    if not hasattr(self.op, 'nics'):
7201
      self.op.nics = []
7202
    if not hasattr(self.op, 'disks'):
7203
      self.op.disks = []
7204
    if not hasattr(self.op, 'beparams'):
7205
      self.op.beparams = {}
7206
    if not hasattr(self.op, 'hvparams'):
7207
      self.op.hvparams = {}
7208
    self.op.force = getattr(self.op, "force", False)
7209
    if not (self.op.nics or self.op.disks or
7210
            self.op.hvparams or self.op.beparams):
7211
      raise errors.OpPrereqError("No changes submitted")
7212

    
7213
    # Disk validation
7214
    disk_addremove = 0
7215
    for disk_op, disk_dict in self.op.disks:
7216
      if disk_op == constants.DDM_REMOVE:
7217
        disk_addremove += 1
7218
        continue
7219
      elif disk_op == constants.DDM_ADD:
7220
        disk_addremove += 1
7221
      else:
7222
        if not isinstance(disk_op, int):
7223
          raise errors.OpPrereqError("Invalid disk index")
7224
        if not isinstance(disk_dict, dict):
7225
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
7226
          raise errors.OpPrereqError(msg)
7227

    
7228
      if disk_op == constants.DDM_ADD:
7229
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
7230
        if mode not in constants.DISK_ACCESS_SET:
7231
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
7232
        size = disk_dict.get('size', None)
7233
        if size is None:
7234
          raise errors.OpPrereqError("Required disk parameter size missing")
7235
        try:
7236
          size = int(size)
7237
        except ValueError, err:
7238
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
7239
                                     str(err))
7240
        disk_dict['size'] = size
7241
      else:
7242
        # modification of disk
7243
        if 'size' in disk_dict:
7244
          raise errors.OpPrereqError("Disk size change not possible, use"
7245
                                     " grow-disk")
7246

    
7247
    if disk_addremove > 1:
7248
      raise errors.OpPrereqError("Only one disk add or remove operation"
7249
                                 " supported at a time")
7250

    
7251
    # NIC validation
7252
    nic_addremove = 0
7253
    for nic_op, nic_dict in self.op.nics:
7254
      if nic_op == constants.DDM_REMOVE:
7255
        nic_addremove += 1
7256
        continue
7257
      elif nic_op == constants.DDM_ADD:
7258
        nic_addremove += 1
7259
      else:
7260
        if not isinstance(nic_op, int):
7261
          raise errors.OpPrereqError("Invalid nic index")
7262
        if not isinstance(nic_dict, dict):
7263
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
7264
          raise errors.OpPrereqError(msg)
7265

    
7266
      # nic_dict should be a dict
7267
      nic_ip = nic_dict.get('ip', None)
7268
      if nic_ip is not None:
7269
        if nic_ip.lower() == constants.VALUE_NONE:
7270
          nic_dict['ip'] = None
7271
        else:
7272
          if not utils.IsValidIP(nic_ip):
7273
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
7274

    
7275
      nic_bridge = nic_dict.get('bridge', None)
7276
      nic_link = nic_dict.get('link', None)
7277
      if nic_bridge and nic_link:
7278
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7279
                                   " at the same time")
7280
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
7281
        nic_dict['bridge'] = None
7282
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
7283
        nic_dict['link'] = None
7284

    
7285
      if nic_op == constants.DDM_ADD:
7286
        nic_mac = nic_dict.get('mac', None)
7287
        if nic_mac is None:
7288
          nic_dict['mac'] = constants.VALUE_AUTO
7289

    
7290
      if 'mac' in nic_dict:
7291
        nic_mac = nic_dict['mac']
7292
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7293
          if not utils.IsValidMac(nic_mac):
7294
            raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
7295
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
7296
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
7297
                                     " modifying an existing nic")
7298

    
7299
    if nic_addremove > 1:
7300
      raise errors.OpPrereqError("Only one NIC add or remove operation"
7301
                                 " supported at a time")
7302

    
7303
  def ExpandNames(self):
7304
    self._ExpandAndLockInstance()
7305
    self.needed_locks[locking.LEVEL_NODE] = []
7306
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7307

    
7308
  def DeclareLocks(self, level):
7309
    if level == locking.LEVEL_NODE:
7310
      self._LockInstancesNodes()
7311

    
7312
  def BuildHooksEnv(self):
7313
    """Build hooks env.
7314

7315
    This runs on the master, primary and secondaries.
7316

7317
    """
7318
    args = dict()
7319
    if constants.BE_MEMORY in self.be_new:
7320
      args['memory'] = self.be_new[constants.BE_MEMORY]
7321
    if constants.BE_VCPUS in self.be_new:
7322
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
7323
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
7324
    # information at all.
7325
    if self.op.nics:
7326
      args['nics'] = []
7327
      nic_override = dict(self.op.nics)
7328
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
7329
      for idx, nic in enumerate(self.instance.nics):
7330
        if idx in nic_override:
7331
          this_nic_override = nic_override[idx]
7332
        else:
7333
          this_nic_override = {}
7334
        if 'ip' in this_nic_override:
7335
          ip = this_nic_override['ip']
7336
        else:
7337
          ip = nic.ip
7338
        if 'mac' in this_nic_override:
7339
          mac = this_nic_override['mac']
7340
        else:
7341
          mac = nic.mac
7342
        if idx in self.nic_pnew:
7343
          nicparams = self.nic_pnew[idx]
7344
        else:
7345
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
7346
        mode = nicparams[constants.NIC_MODE]
7347
        link = nicparams[constants.NIC_LINK]
7348
        args['nics'].append((ip, mac, mode, link))
7349
      if constants.DDM_ADD in nic_override:
7350
        ip = nic_override[constants.DDM_ADD].get('ip', None)
7351
        mac = nic_override[constants.DDM_ADD]['mac']
7352
        nicparams = self.nic_pnew[constants.DDM_ADD]
7353
        mode = nicparams[constants.NIC_MODE]
7354
        link = nicparams[constants.NIC_LINK]
7355
        args['nics'].append((ip, mac, mode, link))
7356
      elif constants.DDM_REMOVE in nic_override:
7357
        del args['nics'][-1]
7358

    
7359
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
7360
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7361
    return env, nl, nl
7362

    
7363
  def _GetUpdatedParams(self, old_params, update_dict,
7364
                        default_values, parameter_types):
7365
    """Return the new params dict for the given params.
7366

7367
    @type old_params: dict
7368
    @param old_params: old parameters
7369
    @type update_dict: dict
7370
    @param update_dict: dict containing new parameter values,
7371
                        or constants.VALUE_DEFAULT to reset the
7372
                        parameter to its default value
7373
    @type default_values: dict
7374
    @param default_values: default values for the filled parameters
7375
    @type parameter_types: dict
7376
    @param parameter_types: dict mapping target dict keys to types
7377
                            in constants.ENFORCEABLE_TYPES
7378
    @rtype: (dict, dict)
7379
    @return: (new_parameters, filled_parameters)
7380

7381
    """
7382
    params_copy = copy.deepcopy(old_params)
7383
    for key, val in update_dict.iteritems():
7384
      if val == constants.VALUE_DEFAULT:
7385
        try:
7386
          del params_copy[key]
7387
        except KeyError:
7388
          pass
7389
      else:
7390
        params_copy[key] = val
7391
    utils.ForceDictType(params_copy, parameter_types)
7392
    params_filled = objects.FillDict(default_values, params_copy)
7393
    return (params_copy, params_filled)
7394

    
7395
  def CheckPrereq(self):
7396
    """Check prerequisites.
7397

7398
    This only checks the instance list against the existing names.
7399

7400
    """
7401
    self.force = self.op.force
7402

    
7403
    # checking the new params on the primary/secondary nodes
7404

    
7405
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7406
    cluster = self.cluster = self.cfg.GetClusterInfo()
7407
    assert self.instance is not None, \
7408
      "Cannot retrieve locked instance %s" % self.op.instance_name
7409
    pnode = instance.primary_node
7410
    nodelist = list(instance.all_nodes)
7411

    
7412
    # hvparams processing
7413
    if self.op.hvparams:
7414
      i_hvdict, hv_new = self._GetUpdatedParams(
7415
                             instance.hvparams, self.op.hvparams,
7416
                             cluster.hvparams[instance.hypervisor],
7417
                             constants.HVS_PARAMETER_TYPES)
7418
      # local check
7419
      hypervisor.GetHypervisor(
7420
        instance.hypervisor).CheckParameterSyntax(hv_new)
7421
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
7422
      self.hv_new = hv_new # the new actual values
7423
      self.hv_inst = i_hvdict # the new dict (without defaults)
7424
    else:
7425
      self.hv_new = self.hv_inst = {}
7426

    
7427
    # beparams processing
7428
    if self.op.beparams:
7429
      i_bedict, be_new = self._GetUpdatedParams(
7430
                             instance.beparams, self.op.beparams,
7431
                             cluster.beparams[constants.PP_DEFAULT],
7432
                             constants.BES_PARAMETER_TYPES)
7433
      self.be_new = be_new # the new actual values
7434
      self.be_inst = i_bedict # the new dict (without defaults)
7435
    else:
7436
      self.be_new = self.be_inst = {}
7437

    
7438
    self.warn = []
7439

    
7440
    if constants.BE_MEMORY in self.op.beparams and not self.force:
7441
      mem_check_list = [pnode]
7442
      if be_new[constants.BE_AUTO_BALANCE]:
7443
        # either we changed auto_balance to yes or it was from before
7444
        mem_check_list.extend(instance.secondary_nodes)
7445
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
7446
                                                  instance.hypervisor)
7447
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
7448
                                         instance.hypervisor)
7449
      pninfo = nodeinfo[pnode]
7450
      msg = pninfo.fail_msg
7451
      if msg:
7452
        # Assume the primary node is unreachable and go ahead
7453
        self.warn.append("Can't get info from primary node %s: %s" %
7454
                         (pnode,  msg))
7455
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
7456
        self.warn.append("Node data from primary node %s doesn't contain"
7457
                         " free memory information" % pnode)
7458
      elif instance_info.fail_msg:
7459
        self.warn.append("Can't get instance runtime information: %s" %
7460
                        instance_info.fail_msg)
7461
      else:
7462
        if instance_info.payload:
7463
          current_mem = int(instance_info.payload['memory'])
7464
        else:
7465
          # Assume instance not running
7466
          # (there is a slight race condition here, but it's not very probable,
7467
          # and we have no other way to check)
7468
          current_mem = 0
7469
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
7470
                    pninfo.payload['memory_free'])
7471
        if miss_mem > 0:
7472
          raise errors.OpPrereqError("This change will prevent the instance"
7473
                                     " from starting, due to %d MB of memory"
7474
                                     " missing on its primary node" % miss_mem)
7475

    
7476
      if be_new[constants.BE_AUTO_BALANCE]:
7477
        for node, nres in nodeinfo.items():
7478
          if node not in instance.secondary_nodes:
7479
            continue
7480
          msg = nres.fail_msg
7481
          if msg:
7482
            self.warn.append("Can't get info from secondary node %s: %s" %
7483
                             (node, msg))
7484
          elif not isinstance(nres.payload.get('memory_free', None), int):
7485
            self.warn.append("Secondary node %s didn't return free"
7486
                             " memory information" % node)
7487
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
7488
            self.warn.append("Not enough memory to failover instance to"
7489
                             " secondary node %s" % node)
7490

    
7491
    # NIC processing
7492
    self.nic_pnew = {}
7493
    self.nic_pinst = {}
7494
    for nic_op, nic_dict in self.op.nics:
7495
      if nic_op == constants.DDM_REMOVE:
7496
        if not instance.nics:
7497
          raise errors.OpPrereqError("Instance has no NICs, cannot remove")
7498
        continue
7499
      if nic_op != constants.DDM_ADD:
7500
        # an existing nic
7501
        if nic_op < 0 or nic_op >= len(instance.nics):
7502
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
7503
                                     " are 0 to %d" %
7504
                                     (nic_op, len(instance.nics)))
7505
        old_nic_params = instance.nics[nic_op].nicparams
7506
        old_nic_ip = instance.nics[nic_op].ip
7507
      else:
7508
        old_nic_params = {}
7509
        old_nic_ip = None
7510

    
7511
      update_params_dict = dict([(key, nic_dict[key])
7512
                                 for key in constants.NICS_PARAMETERS
7513
                                 if key in nic_dict])
7514

    
7515
      if 'bridge' in nic_dict:
7516
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
7517

    
7518
      new_nic_params, new_filled_nic_params = \
7519
          self._GetUpdatedParams(old_nic_params, update_params_dict,
7520
                                 cluster.nicparams[constants.PP_DEFAULT],
7521
                                 constants.NICS_PARAMETER_TYPES)
7522
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
7523
      self.nic_pinst[nic_op] = new_nic_params
7524
      self.nic_pnew[nic_op] = new_filled_nic_params
7525
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
7526

    
7527
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
7528
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
7529
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
7530
        if msg:
7531
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
7532
          if self.force:
7533
            self.warn.append(msg)
7534
          else:
7535
            raise errors.OpPrereqError(msg)
7536
      if new_nic_mode == constants.NIC_MODE_ROUTED:
7537
        if 'ip' in nic_dict:
7538
          nic_ip = nic_dict['ip']
7539
        else:
7540
          nic_ip = old_nic_ip
7541
        if nic_ip is None:
7542
          raise errors.OpPrereqError('Cannot set the nic ip to None'
7543
                                     ' on a routed nic')
7544
      if 'mac' in nic_dict:
7545
        nic_mac = nic_dict['mac']
7546
        if nic_mac is None:
7547
          raise errors.OpPrereqError('Cannot set the nic mac to None')
7548
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7549
          # otherwise generate the mac
7550
          nic_dict['mac'] = self.cfg.GenerateMAC()
7551
        else:
7552
          # or validate/reserve the current one
7553
          if self.cfg.IsMacInUse(nic_mac):
7554
            raise errors.OpPrereqError("MAC address %s already in use"
7555
                                       " in cluster" % nic_mac)
7556

    
7557
    # DISK processing
7558
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
7559
      raise errors.OpPrereqError("Disk operations not supported for"
7560
                                 " diskless instances")
7561
    for disk_op, disk_dict in self.op.disks:
7562
      if disk_op == constants.DDM_REMOVE:
7563
        if len(instance.disks) == 1:
7564
          raise errors.OpPrereqError("Cannot remove the last disk of"
7565
                                     " an instance")
7566
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
7567
        ins_l = ins_l[pnode]
7568
        msg = ins_l.fail_msg
7569
        if msg:
7570
          raise errors.OpPrereqError("Can't contact node %s: %s" %
7571
                                     (pnode, msg))
7572
        if instance.name in ins_l.payload:
7573
          raise errors.OpPrereqError("Instance is running, can't remove"
7574
                                     " disks.")
7575

    
7576
      if (disk_op == constants.DDM_ADD and
7577
          len(instance.nics) >= constants.MAX_DISKS):
7578
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
7579
                                   " add more" % constants.MAX_DISKS)
7580
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
7581
        # an existing disk
7582
        if disk_op < 0 or disk_op >= len(instance.disks):
7583
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
7584
                                     " are 0 to %d" %
7585
                                     (disk_op, len(instance.disks)))
7586

    
7587
    return
7588

    
7589
  def Exec(self, feedback_fn):
7590
    """Modifies an instance.
7591

7592
    All parameters take effect only at the next restart of the instance.
7593

7594
    """
7595
    # Process here the warnings from CheckPrereq, as we don't have a
7596
    # feedback_fn there.
7597
    for warn in self.warn:
7598
      feedback_fn("WARNING: %s" % warn)
7599

    
7600
    result = []
7601
    instance = self.instance
7602
    cluster = self.cluster
7603
    # disk changes
7604
    for disk_op, disk_dict in self.op.disks:
7605
      if disk_op == constants.DDM_REMOVE:
7606
        # remove the last disk
7607
        device = instance.disks.pop()
7608
        device_idx = len(instance.disks)
7609
        for node, disk in device.ComputeNodeTree(instance.primary_node):
7610
          self.cfg.SetDiskID(disk, node)
7611
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
7612
          if msg:
7613
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
7614
                            " continuing anyway", device_idx, node, msg)
7615
        result.append(("disk/%d" % device_idx, "remove"))
7616
      elif disk_op == constants.DDM_ADD:
7617
        # add a new disk
7618
        if instance.disk_template == constants.DT_FILE:
7619
          file_driver, file_path = instance.disks[0].logical_id
7620
          file_path = os.path.dirname(file_path)
7621
        else:
7622
          file_driver = file_path = None
7623
        disk_idx_base = len(instance.disks)
7624
        new_disk = _GenerateDiskTemplate(self,
7625
                                         instance.disk_template,
7626
                                         instance.name, instance.primary_node,
7627
                                         instance.secondary_nodes,
7628
                                         [disk_dict],
7629
                                         file_path,
7630
                                         file_driver,
7631
                                         disk_idx_base)[0]
7632
        instance.disks.append(new_disk)
7633
        info = _GetInstanceInfoText(instance)
7634

    
7635
        logging.info("Creating volume %s for instance %s",
7636
                     new_disk.iv_name, instance.name)
7637
        # Note: this needs to be kept in sync with _CreateDisks
7638
        #HARDCODE
7639
        for node in instance.all_nodes:
7640
          f_create = node == instance.primary_node
7641
          try:
7642
            _CreateBlockDev(self, node, instance, new_disk,
7643
                            f_create, info, f_create)
7644
          except errors.OpExecError, err:
7645
            self.LogWarning("Failed to create volume %s (%s) on"
7646
                            " node %s: %s",
7647
                            new_disk.iv_name, new_disk, node, err)
7648
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
7649
                       (new_disk.size, new_disk.mode)))
7650
      else:
7651
        # change a given disk
7652
        instance.disks[disk_op].mode = disk_dict['mode']
7653
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
7654
    # NIC changes
7655
    for nic_op, nic_dict in self.op.nics:
7656
      if nic_op == constants.DDM_REMOVE:
7657
        # remove the last nic
7658
        del instance.nics[-1]
7659
        result.append(("nic.%d" % len(instance.nics), "remove"))
7660
      elif nic_op == constants.DDM_ADD:
7661
        # mac and bridge should be set, by now
7662
        mac = nic_dict['mac']
7663
        ip = nic_dict.get('ip', None)
7664
        nicparams = self.nic_pinst[constants.DDM_ADD]
7665
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
7666
        instance.nics.append(new_nic)
7667
        result.append(("nic.%d" % (len(instance.nics) - 1),
7668
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
7669
                       (new_nic.mac, new_nic.ip,
7670
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
7671
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
7672
                       )))
7673
      else:
7674
        for key in 'mac', 'ip':
7675
          if key in nic_dict:
7676
            setattr(instance.nics[nic_op], key, nic_dict[key])
7677
        if nic_op in self.nic_pnew:
7678
          instance.nics[nic_op].nicparams = self.nic_pnew[nic_op]
7679
        for key, val in nic_dict.iteritems():
7680
          result.append(("nic.%s/%d" % (key, nic_op), val))
7681

    
7682
    # hvparams changes
7683
    if self.op.hvparams:
7684
      instance.hvparams = self.hv_inst
7685
      for key, val in self.op.hvparams.iteritems():
7686
        result.append(("hv/%s" % key, val))
7687

    
7688
    # beparams changes
7689
    if self.op.beparams:
7690
      instance.beparams = self.be_inst
7691
      for key, val in self.op.beparams.iteritems():
7692
        result.append(("be/%s" % key, val))
7693

    
7694
    self.cfg.Update(instance, feedback_fn)
7695

    
7696
    return result
7697

    
7698

    
7699
class LUQueryExports(NoHooksLU):
7700
  """Query the exports list
7701

7702
  """
7703
  _OP_REQP = ['nodes']
7704
  REQ_BGL = False
7705

    
7706
  def ExpandNames(self):
7707
    self.needed_locks = {}
7708
    self.share_locks[locking.LEVEL_NODE] = 1
7709
    if not self.op.nodes:
7710
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7711
    else:
7712
      self.needed_locks[locking.LEVEL_NODE] = \
7713
        _GetWantedNodes(self, self.op.nodes)
7714

    
7715
  def CheckPrereq(self):
7716
    """Check prerequisites.
7717

7718
    """
7719
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
7720

    
7721
  def Exec(self, feedback_fn):
7722
    """Compute the list of all the exported system images.
7723

7724
    @rtype: dict
7725
    @return: a dictionary with the structure node->(export-list)
7726
        where export-list is a list of the instances exported on
7727
        that node.
7728

7729
    """
7730
    rpcresult = self.rpc.call_export_list(self.nodes)
7731
    result = {}
7732
    for node in rpcresult:
7733
      if rpcresult[node].fail_msg:
7734
        result[node] = False
7735
      else:
7736
        result[node] = rpcresult[node].payload
7737

    
7738
    return result
7739

    
7740

    
7741
class LUExportInstance(LogicalUnit):
7742
  """Export an instance to an image in the cluster.
7743

7744
  """
7745
  HPATH = "instance-export"
7746
  HTYPE = constants.HTYPE_INSTANCE
7747
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
7748
  REQ_BGL = False
7749

    
7750
  def CheckArguments(self):
7751
    """Check the arguments.
7752

7753
    """
7754
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
7755
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
7756

    
7757
  def ExpandNames(self):
7758
    self._ExpandAndLockInstance()
7759
    # FIXME: lock only instance primary and destination node
7760
    #
7761
    # Sad but true, for now we have do lock all nodes, as we don't know where
7762
    # the previous export might be, and and in this LU we search for it and
7763
    # remove it from its current node. In the future we could fix this by:
7764
    #  - making a tasklet to search (share-lock all), then create the new one,
7765
    #    then one to remove, after
7766
    #  - removing the removal operation altogether
7767
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7768

    
7769
  def DeclareLocks(self, level):
7770
    """Last minute lock declaration."""
7771
    # All nodes are locked anyway, so nothing to do here.
7772

    
7773
  def BuildHooksEnv(self):
7774
    """Build hooks env.
7775

7776
    This will run on the master, primary node and target node.
7777

7778
    """
7779
    env = {
7780
      "EXPORT_NODE": self.op.target_node,
7781
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
7782
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
7783
      }
7784
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7785
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
7786
          self.op.target_node]
7787
    return env, nl, nl
7788

    
7789
  def CheckPrereq(self):
7790
    """Check prerequisites.
7791

7792
    This checks that the instance and node names are valid.
7793

7794
    """
7795
    instance_name = self.op.instance_name
7796
    self.instance = self.cfg.GetInstanceInfo(instance_name)
7797
    assert self.instance is not None, \
7798
          "Cannot retrieve locked instance %s" % self.op.instance_name
7799
    _CheckNodeOnline(self, self.instance.primary_node)
7800

    
7801
    self.dst_node = self.cfg.GetNodeInfo(
7802
      self.cfg.ExpandNodeName(self.op.target_node))
7803

    
7804
    if self.dst_node is None:
7805
      # This is wrong node name, not a non-locked node
7806
      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
7807
    _CheckNodeOnline(self, self.dst_node.name)
7808
    _CheckNodeNotDrained(self, self.dst_node.name)
7809

    
7810
    # instance disk type verification
7811
    for disk in self.instance.disks:
7812
      if disk.dev_type == constants.LD_FILE:
7813
        raise errors.OpPrereqError("Export not supported for instances with"
7814
                                   " file-based disks")
7815

    
7816
  def Exec(self, feedback_fn):
7817
    """Export an instance to an image in the cluster.
7818

7819
    """
7820
    instance = self.instance
7821
    dst_node = self.dst_node
7822
    src_node = instance.primary_node
7823

    
7824
    if self.op.shutdown:
7825
      # shutdown the instance, but not the disks
7826
      feedback_fn("Shutting down instance %s" % instance.name)
7827
      result = self.rpc.call_instance_shutdown(src_node, instance,
7828
                                               self.shutdown_timeout)
7829
      result.Raise("Could not shutdown instance %s on"
7830
                   " node %s" % (instance.name, src_node))
7831

    
7832
    vgname = self.cfg.GetVGName()
7833

    
7834
    snap_disks = []
7835

    
7836
    # set the disks ID correctly since call_instance_start needs the
7837
    # correct drbd minor to create the symlinks
7838
    for disk in instance.disks:
7839
      self.cfg.SetDiskID(disk, src_node)
7840

    
7841
    # per-disk results
7842
    dresults = []
7843
    try:
7844
      for idx, disk in enumerate(instance.disks):
7845
        feedback_fn("Creating a snapshot of disk/%s on node %s" %
7846
                    (idx, src_node))
7847

    
7848
        # result.payload will be a snapshot of an lvm leaf of the one we passed
7849
        result = self.rpc.call_blockdev_snapshot(src_node, disk)
7850
        msg = result.fail_msg
7851
        if msg:
7852
          self.LogWarning("Could not snapshot disk/%s on node %s: %s",
7853
                          idx, src_node, msg)
7854
          snap_disks.append(False)
7855
        else:
7856
          disk_id = (vgname, result.payload)
7857
          new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
7858
                                 logical_id=disk_id, physical_id=disk_id,
7859
                                 iv_name=disk.iv_name)
7860
          snap_disks.append(new_dev)
7861

    
7862
    finally:
7863
      if self.op.shutdown and instance.admin_up:
7864
        feedback_fn("Starting instance %s" % instance.name)
7865
        result = self.rpc.call_instance_start(src_node, instance, None, None)
7866
        msg = result.fail_msg
7867
        if msg:
7868
          _ShutdownInstanceDisks(self, instance)
7869
          raise errors.OpExecError("Could not start instance: %s" % msg)
7870

    
7871
    # TODO: check for size
7872

    
7873
    cluster_name = self.cfg.GetClusterName()
7874
    for idx, dev in enumerate(snap_disks):
7875
      feedback_fn("Exporting snapshot %s from %s to %s" %
7876
                  (idx, src_node, dst_node.name))
7877
      if dev:
7878
        result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
7879
                                               instance, cluster_name, idx)
7880
        msg = result.fail_msg
7881
        if msg:
7882
          self.LogWarning("Could not export disk/%s from node %s to"
7883
                          " node %s: %s", idx, src_node, dst_node.name, msg)
7884
          dresults.append(False)
7885
        else:
7886
          dresults.append(True)
7887
        msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
7888
        if msg:
7889
          self.LogWarning("Could not remove snapshot for disk/%d from node"
7890
                          " %s: %s", idx, src_node, msg)
7891
      else:
7892
        dresults.append(False)
7893

    
7894
    feedback_fn("Finalizing export on %s" % dst_node.name)
7895
    result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
7896
    fin_resu = True
7897
    msg = result.fail_msg
7898
    if msg:
7899
      self.LogWarning("Could not finalize export for instance %s"
7900
                      " on node %s: %s", instance.name, dst_node.name, msg)
7901
      fin_resu = False
7902

    
7903
    nodelist = self.cfg.GetNodeList()
7904
    nodelist.remove(dst_node.name)
7905

    
7906
    # on one-node clusters nodelist will be empty after the removal
7907
    # if we proceed the backup would be removed because OpQueryExports
7908
    # substitutes an empty list with the full cluster node list.
7909
    iname = instance.name
7910
    if nodelist:
7911
      feedback_fn("Removing old exports for instance %s" % iname)
7912
      exportlist = self.rpc.call_export_list(nodelist)
7913
      for node in exportlist:
7914
        if exportlist[node].fail_msg:
7915
          continue
7916
        if iname in exportlist[node].payload:
7917
          msg = self.rpc.call_export_remove(node, iname).fail_msg
7918
          if msg:
7919
            self.LogWarning("Could not remove older export for instance %s"
7920
                            " on node %s: %s", iname, node, msg)
7921
    return fin_resu, dresults
7922

    
7923

    
7924
class LURemoveExport(NoHooksLU):
7925
  """Remove exports related to the named instance.
7926

7927
  """
7928
  _OP_REQP = ["instance_name"]
7929
  REQ_BGL = False
7930

    
7931
  def ExpandNames(self):
7932
    self.needed_locks = {}
7933
    # We need all nodes to be locked in order for RemoveExport to work, but we
7934
    # don't need to lock the instance itself, as nothing will happen to it (and
7935
    # we can remove exports also for a removed instance)
7936
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7937

    
7938
  def CheckPrereq(self):
7939
    """Check prerequisites.
7940
    """
7941
    pass
7942

    
7943
  def Exec(self, feedback_fn):
7944
    """Remove any export.
7945

7946
    """
7947
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
7948
    # If the instance was not found we'll try with the name that was passed in.
7949
    # This will only work if it was an FQDN, though.
7950
    fqdn_warn = False
7951
    if not instance_name:
7952
      fqdn_warn = True
7953
      instance_name = self.op.instance_name
7954

    
7955
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7956
    exportlist = self.rpc.call_export_list(locked_nodes)
7957
    found = False
7958
    for node in exportlist:
7959
      msg = exportlist[node].fail_msg
7960
      if msg:
7961
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
7962
        continue
7963
      if instance_name in exportlist[node].payload:
7964
        found = True
7965
        result = self.rpc.call_export_remove(node, instance_name)
7966
        msg = result.fail_msg
7967
        if msg:
7968
          logging.error("Could not remove export for instance %s"
7969
                        " on node %s: %s", instance_name, node, msg)
7970

    
7971
    if fqdn_warn and not found:
7972
      feedback_fn("Export not found. If trying to remove an export belonging"
7973
                  " to a deleted instance please use its Fully Qualified"
7974
                  " Domain Name.")
7975

    
7976

    
7977
class TagsLU(NoHooksLU):
7978
  """Generic tags LU.
7979

7980
  This is an abstract class which is the parent of all the other tags LUs.
7981

7982
  """
7983

    
7984
  def ExpandNames(self):
7985
    self.needed_locks = {}
7986
    if self.op.kind == constants.TAG_NODE:
7987
      name = self.cfg.ExpandNodeName(self.op.name)
7988
      if name is None:
7989
        raise errors.OpPrereqError("Invalid node name (%s)" %
7990
                                   (self.op.name,))
7991
      self.op.name = name
7992
      self.needed_locks[locking.LEVEL_NODE] = name
7993
    elif self.op.kind == constants.TAG_INSTANCE:
7994
      name = self.cfg.ExpandInstanceName(self.op.name)
7995
      if name is None:
7996
        raise errors.OpPrereqError("Invalid instance name (%s)" %
7997
                                   (self.op.name,))
7998
      self.op.name = name
7999
      self.needed_locks[locking.LEVEL_INSTANCE] = name
8000

    
8001
  def CheckPrereq(self):
8002
    """Check prerequisites.
8003

8004
    """
8005
    if self.op.kind == constants.TAG_CLUSTER:
8006
      self.target = self.cfg.GetClusterInfo()
8007
    elif self.op.kind == constants.TAG_NODE:
8008
      self.target = self.cfg.GetNodeInfo(self.op.name)
8009
    elif self.op.kind == constants.TAG_INSTANCE:
8010
      self.target = self.cfg.GetInstanceInfo(self.op.name)
8011
    else:
8012
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
8013
                                 str(self.op.kind))
8014

    
8015

    
8016
class LUGetTags(TagsLU):
8017
  """Returns the tags of a given object.
8018

8019
  """
8020
  _OP_REQP = ["kind", "name"]
8021
  REQ_BGL = False
8022

    
8023
  def Exec(self, feedback_fn):
8024
    """Returns the tag list.
8025

8026
    """
8027
    return list(self.target.GetTags())
8028

    
8029

    
8030
class LUSearchTags(NoHooksLU):
8031
  """Searches the tags for a given pattern.
8032

8033
  """
8034
  _OP_REQP = ["pattern"]
8035
  REQ_BGL = False
8036

    
8037
  def ExpandNames(self):
8038
    self.needed_locks = {}
8039

    
8040
  def CheckPrereq(self):
8041
    """Check prerequisites.
8042

8043
    This checks the pattern passed for validity by compiling it.
8044

8045
    """
8046
    try:
8047
      self.re = re.compile(self.op.pattern)
8048
    except re.error, err:
8049
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
8050
                                 (self.op.pattern, err))
8051

    
8052
  def Exec(self, feedback_fn):
8053
    """Returns the tag list.
8054

8055
    """
8056
    cfg = self.cfg
8057
    tgts = [("/cluster", cfg.GetClusterInfo())]
8058
    ilist = cfg.GetAllInstancesInfo().values()
8059
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
8060
    nlist = cfg.GetAllNodesInfo().values()
8061
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
8062
    results = []
8063
    for path, target in tgts:
8064
      for tag in target.GetTags():
8065
        if self.re.search(tag):
8066
          results.append((path, tag))
8067
    return results
8068

    
8069

    
8070
class LUAddTags(TagsLU):
8071
  """Sets a tag on a given object.
8072

8073
  """
8074
  _OP_REQP = ["kind", "name", "tags"]
8075
  REQ_BGL = False
8076

    
8077
  def CheckPrereq(self):
8078
    """Check prerequisites.
8079

8080
    This checks the type and length of the tag name and value.
8081

8082
    """
8083
    TagsLU.CheckPrereq(self)
8084
    for tag in self.op.tags:
8085
      objects.TaggableObject.ValidateTag(tag)
8086

    
8087
  def Exec(self, feedback_fn):
8088
    """Sets the tag.
8089

8090
    """
8091
    try:
8092
      for tag in self.op.tags:
8093
        self.target.AddTag(tag)
8094
    except errors.TagError, err:
8095
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
8096
    try:
8097
      self.cfg.Update(self.target, feedback_fn)
8098
    except errors.ConfigurationError:
8099
      raise errors.OpRetryError("There has been a modification to the"
8100
                                " config file and the operation has been"
8101
                                " aborted. Please retry.")
8102

    
8103

    
8104
class LUDelTags(TagsLU):
8105
  """Delete a list of tags from a given object.
8106

8107
  """
8108
  _OP_REQP = ["kind", "name", "tags"]
8109
  REQ_BGL = False
8110

    
8111
  def CheckPrereq(self):
8112
    """Check prerequisites.
8113

8114
    This checks that we have the given tag.
8115

8116
    """
8117
    TagsLU.CheckPrereq(self)
8118
    for tag in self.op.tags:
8119
      objects.TaggableObject.ValidateTag(tag)
8120
    del_tags = frozenset(self.op.tags)
8121
    cur_tags = self.target.GetTags()
8122
    if not del_tags <= cur_tags:
8123
      diff_tags = del_tags - cur_tags
8124
      diff_names = ["'%s'" % tag for tag in diff_tags]
8125
      diff_names.sort()
8126
      raise errors.OpPrereqError("Tag(s) %s not found" %
8127
                                 (",".join(diff_names)))
8128

    
8129
  def Exec(self, feedback_fn):
8130
    """Remove the tag from the object.
8131

8132
    """
8133
    for tag in self.op.tags:
8134
      self.target.RemoveTag(tag)
8135
    try:
8136
      self.cfg.Update(self.target, feedback_fn)
8137
    except errors.ConfigurationError:
8138
      raise errors.OpRetryError("There has been a modification to the"
8139
                                " config file and the operation has been"
8140
                                " aborted. Please retry.")
8141

    
8142

    
8143
class LUTestDelay(NoHooksLU):
8144
  """Sleep for a specified amount of time.
8145

8146
  This LU sleeps on the master and/or nodes for a specified amount of
8147
  time.
8148

8149
  """
8150
  _OP_REQP = ["duration", "on_master", "on_nodes"]
8151
  REQ_BGL = False
8152

    
8153
  def ExpandNames(self):
8154
    """Expand names and set required locks.
8155

8156
    This expands the node list, if any.
8157

8158
    """
8159
    self.needed_locks = {}
8160
    if self.op.on_nodes:
8161
      # _GetWantedNodes can be used here, but is not always appropriate to use
8162
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
8163
      # more information.
8164
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
8165
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
8166

    
8167
  def CheckPrereq(self):
8168
    """Check prerequisites.
8169

8170
    """
8171

    
8172
  def Exec(self, feedback_fn):
8173
    """Do the actual sleep.
8174

8175
    """
8176
    if self.op.on_master:
8177
      if not utils.TestDelay(self.op.duration):
8178
        raise errors.OpExecError("Error during master delay test")
8179
    if self.op.on_nodes:
8180
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
8181
      for node, node_result in result.items():
8182
        node_result.Raise("Failure during rpc call to node %s" % node)
8183

    
8184

    
8185
class IAllocator(object):
8186
  """IAllocator framework.
8187

8188
  An IAllocator instance has three sets of attributes:
8189
    - cfg that is needed to query the cluster
8190
    - input data (all members of the _KEYS class attribute are required)
8191
    - four buffer attributes (in|out_data|text), that represent the
8192
      input (to the external script) in text and data structure format,
8193
      and the output from it, again in two formats
8194
    - the result variables from the script (success, info, nodes) for
8195
      easy usage
8196

8197
  """
8198
  _ALLO_KEYS = [
8199
    "mem_size", "disks", "disk_template",
8200
    "os", "tags", "nics", "vcpus", "hypervisor",
8201
    ]
8202
  _RELO_KEYS = [
8203
    "relocate_from",
8204
    ]
8205

    
8206
  def __init__(self, cfg, rpc, mode, name, **kwargs):
8207
    self.cfg = cfg
8208
    self.rpc = rpc
8209
    # init buffer variables
8210
    self.in_text = self.out_text = self.in_data = self.out_data = None
8211
    # init all input fields so that pylint is happy
8212
    self.mode = mode
8213
    self.name = name
8214
    self.mem_size = self.disks = self.disk_template = None
8215
    self.os = self.tags = self.nics = self.vcpus = None
8216
    self.hypervisor = None
8217
    self.relocate_from = None
8218
    # computed fields
8219
    self.required_nodes = None
8220
    # init result fields
8221
    self.success = self.info = self.nodes = None
8222
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8223
      keyset = self._ALLO_KEYS
8224
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8225
      keyset = self._RELO_KEYS
8226
    else:
8227
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
8228
                                   " IAllocator" % self.mode)
8229
    for key in kwargs:
8230
      if key not in keyset:
8231
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
8232
                                     " IAllocator" % key)
8233
      setattr(self, key, kwargs[key])
8234
    for key in keyset:
8235
      if key not in kwargs:
8236
        raise errors.ProgrammerError("Missing input parameter '%s' to"
8237
                                     " IAllocator" % key)
8238
    self._BuildInputData()
8239

    
8240
  def _ComputeClusterData(self):
8241
    """Compute the generic allocator input data.
8242

8243
    This is the data that is independent of the actual operation.
8244

8245
    """
8246
    cfg = self.cfg
8247
    cluster_info = cfg.GetClusterInfo()
8248
    # cluster data
8249
    data = {
8250
      "version": constants.IALLOCATOR_VERSION,
8251
      "cluster_name": cfg.GetClusterName(),
8252
      "cluster_tags": list(cluster_info.GetTags()),
8253
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
8254
      # we don't have job IDs
8255
      }
8256
    iinfo = cfg.GetAllInstancesInfo().values()
8257
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
8258

    
8259
    # node data
8260
    node_results = {}
8261
    node_list = cfg.GetNodeList()
8262

    
8263
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8264
      hypervisor_name = self.hypervisor
8265
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8266
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
8267

    
8268
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
8269
                                        hypervisor_name)
8270
    node_iinfo = \
8271
      self.rpc.call_all_instances_info(node_list,
8272
                                       cluster_info.enabled_hypervisors)
8273
    for nname, nresult in node_data.items():
8274
      # first fill in static (config-based) values
8275
      ninfo = cfg.GetNodeInfo(nname)
8276
      pnr = {
8277
        "tags": list(ninfo.GetTags()),
8278
        "primary_ip": ninfo.primary_ip,
8279
        "secondary_ip": ninfo.secondary_ip,
8280
        "offline": ninfo.offline,
8281
        "drained": ninfo.drained,
8282
        "master_candidate": ninfo.master_candidate,
8283
        }
8284

    
8285
      if not (ninfo.offline or ninfo.drained):
8286
        nresult.Raise("Can't get data for node %s" % nname)
8287
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
8288
                                nname)
8289
        remote_info = nresult.payload
8290

    
8291
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
8292
                     'vg_size', 'vg_free', 'cpu_total']:
8293
          if attr not in remote_info:
8294
            raise errors.OpExecError("Node '%s' didn't return attribute"
8295
                                     " '%s'" % (nname, attr))
8296
          if not isinstance(remote_info[attr], int):
8297
            raise errors.OpExecError("Node '%s' returned invalid value"
8298
                                     " for '%s': %s" %
8299
                                     (nname, attr, remote_info[attr]))
8300
        # compute memory used by primary instances
8301
        i_p_mem = i_p_up_mem = 0
8302
        for iinfo, beinfo in i_list:
8303
          if iinfo.primary_node == nname:
8304
            i_p_mem += beinfo[constants.BE_MEMORY]
8305
            if iinfo.name not in node_iinfo[nname].payload:
8306
              i_used_mem = 0
8307
            else:
8308
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
8309
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
8310
            remote_info['memory_free'] -= max(0, i_mem_diff)
8311

    
8312
            if iinfo.admin_up:
8313
              i_p_up_mem += beinfo[constants.BE_MEMORY]
8314

    
8315
        # compute memory used by instances
8316
        pnr_dyn = {
8317
          "total_memory": remote_info['memory_total'],
8318
          "reserved_memory": remote_info['memory_dom0'],
8319
          "free_memory": remote_info['memory_free'],
8320
          "total_disk": remote_info['vg_size'],
8321
          "free_disk": remote_info['vg_free'],
8322
          "total_cpus": remote_info['cpu_total'],
8323
          "i_pri_memory": i_p_mem,
8324
          "i_pri_up_memory": i_p_up_mem,
8325
          }
8326
        pnr.update(pnr_dyn)
8327

    
8328
      node_results[nname] = pnr
8329
    data["nodes"] = node_results
8330

    
8331
    # instance data
8332
    instance_data = {}
8333
    for iinfo, beinfo in i_list:
8334
      nic_data = []
8335
      for nic in iinfo.nics:
8336
        filled_params = objects.FillDict(
8337
            cluster_info.nicparams[constants.PP_DEFAULT],
8338
            nic.nicparams)
8339
        nic_dict = {"mac": nic.mac,
8340
                    "ip": nic.ip,
8341
                    "mode": filled_params[constants.NIC_MODE],
8342
                    "link": filled_params[constants.NIC_LINK],
8343
                   }
8344
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
8345
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
8346
        nic_data.append(nic_dict)
8347
      pir = {
8348
        "tags": list(iinfo.GetTags()),
8349
        "admin_up": iinfo.admin_up,
8350
        "vcpus": beinfo[constants.BE_VCPUS],
8351
        "memory": beinfo[constants.BE_MEMORY],
8352
        "os": iinfo.os,
8353
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
8354
        "nics": nic_data,
8355
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
8356
        "disk_template": iinfo.disk_template,
8357
        "hypervisor": iinfo.hypervisor,
8358
        }
8359
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
8360
                                                 pir["disks"])
8361
      instance_data[iinfo.name] = pir
8362

    
8363
    data["instances"] = instance_data
8364

    
8365
    self.in_data = data
8366

    
8367
  def _AddNewInstance(self):
8368
    """Add new instance data to allocator structure.
8369

8370
    This in combination with _AllocatorGetClusterData will create the
8371
    correct structure needed as input for the allocator.
8372

8373
    The checks for the completeness of the opcode must have already been
8374
    done.
8375

8376
    """
8377
    data = self.in_data
8378

    
8379
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
8380

    
8381
    if self.disk_template in constants.DTS_NET_MIRROR:
8382
      self.required_nodes = 2
8383
    else:
8384
      self.required_nodes = 1
8385
    request = {
8386
      "type": "allocate",
8387
      "name": self.name,
8388
      "disk_template": self.disk_template,
8389
      "tags": self.tags,
8390
      "os": self.os,
8391
      "vcpus": self.vcpus,
8392
      "memory": self.mem_size,
8393
      "disks": self.disks,
8394
      "disk_space_total": disk_space,
8395
      "nics": self.nics,
8396
      "required_nodes": self.required_nodes,
8397
      }
8398
    data["request"] = request
8399

    
8400
  def _AddRelocateInstance(self):
8401
    """Add relocate instance data to allocator structure.
8402

8403
    This in combination with _IAllocatorGetClusterData will create the
8404
    correct structure needed as input for the allocator.
8405

8406
    The checks for the completeness of the opcode must have already been
8407
    done.
8408

8409
    """
8410
    instance = self.cfg.GetInstanceInfo(self.name)
8411
    if instance is None:
8412
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
8413
                                   " IAllocator" % self.name)
8414

    
8415
    if instance.disk_template not in constants.DTS_NET_MIRROR:
8416
      raise errors.OpPrereqError("Can't relocate non-mirrored instances")
8417

    
8418
    if len(instance.secondary_nodes) != 1:
8419
      raise errors.OpPrereqError("Instance has not exactly one secondary node")
8420

    
8421
    self.required_nodes = 1
8422
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
8423
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
8424

    
8425
    request = {
8426
      "type": "relocate",
8427
      "name": self.name,
8428
      "disk_space_total": disk_space,
8429
      "required_nodes": self.required_nodes,
8430
      "relocate_from": self.relocate_from,
8431
      }
8432
    self.in_data["request"] = request
8433

    
8434
  def _BuildInputData(self):
8435
    """Build input data structures.
8436

8437
    """
8438
    self._ComputeClusterData()
8439

    
8440
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8441
      self._AddNewInstance()
8442
    else:
8443
      self._AddRelocateInstance()
8444

    
8445
    self.in_text = serializer.Dump(self.in_data)
8446

    
8447
  def Run(self, name, validate=True, call_fn=None):
8448
    """Run an instance allocator and return the results.
8449

8450
    """
8451
    if call_fn is None:
8452
      call_fn = self.rpc.call_iallocator_runner
8453

    
8454
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
8455
    result.Raise("Failure while running the iallocator script")
8456

    
8457
    self.out_text = result.payload
8458
    if validate:
8459
      self._ValidateResult()
8460

    
8461
  def _ValidateResult(self):
8462
    """Process the allocator results.
8463

8464
    This will process and if successful save the result in
8465
    self.out_data and the other parameters.
8466

8467
    """
8468
    try:
8469
      rdict = serializer.Load(self.out_text)
8470
    except Exception, err:
8471
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
8472

    
8473
    if not isinstance(rdict, dict):
8474
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
8475

    
8476
    for key in "success", "info", "nodes":
8477
      if key not in rdict:
8478
        raise errors.OpExecError("Can't parse iallocator results:"
8479
                                 " missing key '%s'" % key)
8480
      setattr(self, key, rdict[key])
8481

    
8482
    if not isinstance(rdict["nodes"], list):
8483
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
8484
                               " is not a list")
8485
    self.out_data = rdict
8486

    
8487

    
8488
class LUTestAllocator(NoHooksLU):
8489
  """Run allocator tests.
8490

8491
  This LU runs the allocator tests
8492

8493
  """
8494
  _OP_REQP = ["direction", "mode", "name"]
8495

    
8496
  def CheckPrereq(self):
8497
    """Check prerequisites.
8498

8499
    This checks the opcode parameters depending on the director and mode test.
8500

8501
    """
8502
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8503
      for attr in ["name", "mem_size", "disks", "disk_template",
8504
                   "os", "tags", "nics", "vcpus"]:
8505
        if not hasattr(self.op, attr):
8506
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
8507
                                     attr)
8508
      iname = self.cfg.ExpandInstanceName(self.op.name)
8509
      if iname is not None:
8510
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
8511
                                   iname)
8512
      if not isinstance(self.op.nics, list):
8513
        raise errors.OpPrereqError("Invalid parameter 'nics'")
8514
      for row in self.op.nics:
8515
        if (not isinstance(row, dict) or
8516
            "mac" not in row or
8517
            "ip" not in row or
8518
            "bridge" not in row):
8519
          raise errors.OpPrereqError("Invalid contents of the"
8520
                                     " 'nics' parameter")
8521
      if not isinstance(self.op.disks, list):
8522
        raise errors.OpPrereqError("Invalid parameter 'disks'")
8523
      for row in self.op.disks:
8524
        if (not isinstance(row, dict) or
8525
            "size" not in row or
8526
            not isinstance(row["size"], int) or
8527
            "mode" not in row or
8528
            row["mode"] not in ['r', 'w']):
8529
          raise errors.OpPrereqError("Invalid contents of the"
8530
                                     " 'disks' parameter")
8531
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
8532
        self.op.hypervisor = self.cfg.GetHypervisorType()
8533
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
8534
      if not hasattr(self.op, "name"):
8535
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
8536
      fname = self.cfg.ExpandInstanceName(self.op.name)
8537
      if fname is None:
8538
        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
8539
                                   self.op.name)
8540
      self.op.name = fname
8541
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
8542
    else:
8543
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
8544
                                 self.op.mode)
8545

    
8546
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
8547
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
8548
        raise errors.OpPrereqError("Missing allocator name")
8549
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
8550
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
8551
                                 self.op.direction)
8552

    
8553
  def Exec(self, feedback_fn):
8554
    """Run the allocator test.
8555

8556
    """
8557
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8558
      ial = IAllocator(self.cfg, self.rpc,
8559
                       mode=self.op.mode,
8560
                       name=self.op.name,
8561
                       mem_size=self.op.mem_size,
8562
                       disks=self.op.disks,
8563
                       disk_template=self.op.disk_template,
8564
                       os=self.op.os,
8565
                       tags=self.op.tags,
8566
                       nics=self.op.nics,
8567
                       vcpus=self.op.vcpus,
8568
                       hypervisor=self.op.hypervisor,
8569
                       )
8570
    else:
8571
      ial = IAllocator(self.cfg, self.rpc,
8572
                       mode=self.op.mode,
8573
                       name=self.op.name,
8574
                       relocate_from=list(self.relocate_from),
8575
                       )
8576

    
8577
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
8578
      result = ial.in_text
8579
    else:
8580
      ial.Run(self.op.allocator, validate=False)
8581
      result = ial.out_text
8582
    return result