Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ e588764d

History | View | Annotate | Download (337 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
import os
30
import os.path
31
import time
32
import re
33
import platform
34
import logging
35
import copy
36
import OpenSSL
37

    
38
from ganeti import ssh
39
from ganeti import utils
40
from ganeti import errors
41
from ganeti import hypervisor
42
from ganeti import locking
43
from ganeti import constants
44
from ganeti import objects
45
from ganeti import serializer
46
from ganeti import ssconf
47

    
48

    
49
class LogicalUnit(object):
50
  """Logical Unit base class.
51

52
  Subclasses must follow these rules:
53
    - implement ExpandNames
54
    - implement CheckPrereq (except when tasklets are used)
55
    - implement Exec (except when tasklets are used)
56
    - implement BuildHooksEnv
57
    - redefine HPATH and HTYPE
58
    - optionally redefine their run requirements:
59
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
60

61
  Note that all commands require root permissions.
62

63
  @ivar dry_run_result: the value (if any) that will be returned to the caller
64
      in dry-run mode (signalled by opcode dry_run parameter)
65

66
  """
67
  HPATH = None
68
  HTYPE = None
69
  _OP_REQP = []
70
  REQ_BGL = True
71

    
72
  def __init__(self, processor, op, context, rpc):
73
    """Constructor for LogicalUnit.
74

75
    This needs to be overridden in derived classes in order to check op
76
    validity.
77

78
    """
79
    self.proc = processor
80
    self.op = op
81
    self.cfg = context.cfg
82
    self.context = context
83
    self.rpc = rpc
84
    # Dicts used to declare locking needs to mcpu
85
    self.needed_locks = None
86
    self.acquired_locks = {}
87
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
88
    self.add_locks = {}
89
    self.remove_locks = {}
90
    # Used to force good behavior when calling helper functions
91
    self.recalculate_locks = {}
92
    self.__ssh = None
93
    # logging
94
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
95
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
96
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
97
    # support for dry-run
98
    self.dry_run_result = None
99
    # support for generic debug attribute
100
    if (not hasattr(self.op, "debug_level") or
101
        not isinstance(self.op.debug_level, int)):
102
      self.op.debug_level = 0
103

    
104
    # Tasklets
105
    self.tasklets = None
106

    
107
    for attr_name in self._OP_REQP:
108
      attr_val = getattr(op, attr_name, None)
109
      if attr_val is None:
110
        raise errors.OpPrereqError("Required parameter '%s' missing" %
111
                                   attr_name, errors.ECODE_INVAL)
112

    
113
    self.CheckArguments()
114

    
115
  def __GetSSH(self):
116
    """Returns the SshRunner object
117

118
    """
119
    if not self.__ssh:
120
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
121
    return self.__ssh
122

    
123
  ssh = property(fget=__GetSSH)
124

    
125
  def CheckArguments(self):
126
    """Check syntactic validity for the opcode arguments.
127

128
    This method is for doing a simple syntactic check and ensure
129
    validity of opcode parameters, without any cluster-related
130
    checks. While the same can be accomplished in ExpandNames and/or
131
    CheckPrereq, doing these separate is better because:
132

133
      - ExpandNames is left as as purely a lock-related function
134
      - CheckPrereq is run after we have acquired locks (and possible
135
        waited for them)
136

137
    The function is allowed to change the self.op attribute so that
138
    later methods can no longer worry about missing parameters.
139

140
    """
141
    pass
142

    
143
  def ExpandNames(self):
144
    """Expand names for this LU.
145

146
    This method is called before starting to execute the opcode, and it should
147
    update all the parameters of the opcode to their canonical form (e.g. a
148
    short node name must be fully expanded after this method has successfully
149
    completed). This way locking, hooks, logging, ecc. can work correctly.
150

151
    LUs which implement this method must also populate the self.needed_locks
152
    member, as a dict with lock levels as keys, and a list of needed lock names
153
    as values. Rules:
154

155
      - use an empty dict if you don't need any lock
156
      - if you don't need any lock at a particular level omit that level
157
      - don't put anything for the BGL level
158
      - if you want all locks at a level use locking.ALL_SET as a value
159

160
    If you need to share locks (rather than acquire them exclusively) at one
161
    level you can modify self.share_locks, setting a true value (usually 1) for
162
    that level. By default locks are not shared.
163

164
    This function can also define a list of tasklets, which then will be
165
    executed in order instead of the usual LU-level CheckPrereq and Exec
166
    functions, if those are not defined by the LU.
167

168
    Examples::
169

170
      # Acquire all nodes and one instance
171
      self.needed_locks = {
172
        locking.LEVEL_NODE: locking.ALL_SET,
173
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
174
      }
175
      # Acquire just two nodes
176
      self.needed_locks = {
177
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
178
      }
179
      # Acquire no locks
180
      self.needed_locks = {} # No, you can't leave it to the default value None
181

182
    """
183
    # The implementation of this method is mandatory only if the new LU is
184
    # concurrent, so that old LUs don't need to be changed all at the same
185
    # time.
186
    if self.REQ_BGL:
187
      self.needed_locks = {} # Exclusive LUs don't need locks.
188
    else:
189
      raise NotImplementedError
190

    
191
  def DeclareLocks(self, level):
192
    """Declare LU locking needs for a level
193

194
    While most LUs can just declare their locking needs at ExpandNames time,
195
    sometimes there's the need to calculate some locks after having acquired
196
    the ones before. This function is called just before acquiring locks at a
197
    particular level, but after acquiring the ones at lower levels, and permits
198
    such calculations. It can be used to modify self.needed_locks, and by
199
    default it does nothing.
200

201
    This function is only called if you have something already set in
202
    self.needed_locks for the level.
203

204
    @param level: Locking level which is going to be locked
205
    @type level: member of ganeti.locking.LEVELS
206

207
    """
208

    
209
  def CheckPrereq(self):
210
    """Check prerequisites for this LU.
211

212
    This method should check that the prerequisites for the execution
213
    of this LU are fulfilled. It can do internode communication, but
214
    it should be idempotent - no cluster or system changes are
215
    allowed.
216

217
    The method should raise errors.OpPrereqError in case something is
218
    not fulfilled. Its return value is ignored.
219

220
    This method should also update all the parameters of the opcode to
221
    their canonical form if it hasn't been done by ExpandNames before.
222

223
    """
224
    if self.tasklets is not None:
225
      for (idx, tl) in enumerate(self.tasklets):
226
        logging.debug("Checking prerequisites for tasklet %s/%s",
227
                      idx + 1, len(self.tasklets))
228
        tl.CheckPrereq()
229
    else:
230
      raise NotImplementedError
231

    
232
  def Exec(self, feedback_fn):
233
    """Execute the LU.
234

235
    This method should implement the actual work. It should raise
236
    errors.OpExecError for failures that are somewhat dealt with in
237
    code, or expected.
238

239
    """
240
    if self.tasklets is not None:
241
      for (idx, tl) in enumerate(self.tasklets):
242
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
243
        tl.Exec(feedback_fn)
244
    else:
245
      raise NotImplementedError
246

    
247
  def BuildHooksEnv(self):
248
    """Build hooks environment for this LU.
249

250
    This method should return a three-node tuple consisting of: a dict
251
    containing the environment that will be used for running the
252
    specific hook for this LU, a list of node names on which the hook
253
    should run before the execution, and a list of node names on which
254
    the hook should run after the execution.
255

256
    The keys of the dict must not have 'GANETI_' prefixed as this will
257
    be handled in the hooks runner. Also note additional keys will be
258
    added by the hooks runner. If the LU doesn't define any
259
    environment, an empty dict (and not None) should be returned.
260

261
    No nodes should be returned as an empty list (and not None).
262

263
    Note that if the HPATH for a LU class is None, this function will
264
    not be called.
265

266
    """
267
    raise NotImplementedError
268

    
269
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
270
    """Notify the LU about the results of its hooks.
271

272
    This method is called every time a hooks phase is executed, and notifies
273
    the Logical Unit about the hooks' result. The LU can then use it to alter
274
    its result based on the hooks.  By default the method does nothing and the
275
    previous result is passed back unchanged but any LU can define it if it
276
    wants to use the local cluster hook-scripts somehow.
277

278
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
279
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
280
    @param hook_results: the results of the multi-node hooks rpc call
281
    @param feedback_fn: function used send feedback back to the caller
282
    @param lu_result: the previous Exec result this LU had, or None
283
        in the PRE phase
284
    @return: the new Exec result, based on the previous result
285
        and hook results
286

287
    """
288
    # API must be kept, thus we ignore the unused argument and could
289
    # be a function warnings
290
    # pylint: disable-msg=W0613,R0201
291
    return lu_result
292

    
293
  def _ExpandAndLockInstance(self):
294
    """Helper function to expand and lock an instance.
295

296
    Many LUs that work on an instance take its name in self.op.instance_name
297
    and need to expand it and then declare the expanded name for locking. This
298
    function does it, and then updates self.op.instance_name to the expanded
299
    name. It also initializes needed_locks as a dict, if this hasn't been done
300
    before.
301

302
    """
303
    if self.needed_locks is None:
304
      self.needed_locks = {}
305
    else:
306
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
307
        "_ExpandAndLockInstance called with instance-level locks set"
308
    self.op.instance_name = _ExpandInstanceName(self.cfg,
309
                                                self.op.instance_name)
310
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
311

    
312
  def _LockInstancesNodes(self, primary_only=False):
313
    """Helper function to declare instances' nodes for locking.
314

315
    This function should be called after locking one or more instances to lock
316
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
317
    with all primary or secondary nodes for instances already locked and
318
    present in self.needed_locks[locking.LEVEL_INSTANCE].
319

320
    It should be called from DeclareLocks, and for safety only works if
321
    self.recalculate_locks[locking.LEVEL_NODE] is set.
322

323
    In the future it may grow parameters to just lock some instance's nodes, or
324
    to just lock primaries or secondary nodes, if needed.
325

326
    If should be called in DeclareLocks in a way similar to::
327

328
      if level == locking.LEVEL_NODE:
329
        self._LockInstancesNodes()
330

331
    @type primary_only: boolean
332
    @param primary_only: only lock primary nodes of locked instances
333

334
    """
335
    assert locking.LEVEL_NODE in self.recalculate_locks, \
336
      "_LockInstancesNodes helper function called with no nodes to recalculate"
337

    
338
    # TODO: check if we're really been called with the instance locks held
339

    
340
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
341
    # future we might want to have different behaviors depending on the value
342
    # of self.recalculate_locks[locking.LEVEL_NODE]
343
    wanted_nodes = []
344
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
345
      instance = self.context.cfg.GetInstanceInfo(instance_name)
346
      wanted_nodes.append(instance.primary_node)
347
      if not primary_only:
348
        wanted_nodes.extend(instance.secondary_nodes)
349

    
350
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
351
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
352
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
353
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
354

    
355
    del self.recalculate_locks[locking.LEVEL_NODE]
356

    
357

    
358
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
359
  """Simple LU which runs no hooks.
360

361
  This LU is intended as a parent for other LogicalUnits which will
362
  run no hooks, in order to reduce duplicate code.
363

364
  """
365
  HPATH = None
366
  HTYPE = None
367

    
368
  def BuildHooksEnv(self):
369
    """Empty BuildHooksEnv for NoHooksLu.
370

371
    This just raises an error.
372

373
    """
374
    assert False, "BuildHooksEnv called for NoHooksLUs"
375

    
376

    
377
class Tasklet:
378
  """Tasklet base class.
379

380
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
381
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
382
  tasklets know nothing about locks.
383

384
  Subclasses must follow these rules:
385
    - Implement CheckPrereq
386
    - Implement Exec
387

388
  """
389
  def __init__(self, lu):
390
    self.lu = lu
391

    
392
    # Shortcuts
393
    self.cfg = lu.cfg
394
    self.rpc = lu.rpc
395

    
396
  def CheckPrereq(self):
397
    """Check prerequisites for this tasklets.
398

399
    This method should check whether the prerequisites for the execution of
400
    this tasklet are fulfilled. It can do internode communication, but it
401
    should be idempotent - no cluster or system changes are allowed.
402

403
    The method should raise errors.OpPrereqError in case something is not
404
    fulfilled. Its return value is ignored.
405

406
    This method should also update all parameters to their canonical form if it
407
    hasn't been done before.
408

409
    """
410
    raise NotImplementedError
411

    
412
  def Exec(self, feedback_fn):
413
    """Execute the tasklet.
414

415
    This method should implement the actual work. It should raise
416
    errors.OpExecError for failures that are somewhat dealt with in code, or
417
    expected.
418

419
    """
420
    raise NotImplementedError
421

    
422

    
423
def _GetWantedNodes(lu, nodes):
424
  """Returns list of checked and expanded node names.
425

426
  @type lu: L{LogicalUnit}
427
  @param lu: the logical unit on whose behalf we execute
428
  @type nodes: list
429
  @param nodes: list of node names or None for all nodes
430
  @rtype: list
431
  @return: the list of nodes, sorted
432
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
433

434
  """
435
  if not isinstance(nodes, list):
436
    raise errors.OpPrereqError("Invalid argument type 'nodes'",
437
                               errors.ECODE_INVAL)
438

    
439
  if not nodes:
440
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
441
      " non-empty list of nodes whose name is to be expanded.")
442

    
443
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
444
  return utils.NiceSort(wanted)
445

    
446

    
447
def _GetWantedInstances(lu, instances):
448
  """Returns list of checked and expanded instance names.
449

450
  @type lu: L{LogicalUnit}
451
  @param lu: the logical unit on whose behalf we execute
452
  @type instances: list
453
  @param instances: list of instance names or None for all instances
454
  @rtype: list
455
  @return: the list of instances, sorted
456
  @raise errors.OpPrereqError: if the instances parameter is wrong type
457
  @raise errors.OpPrereqError: if any of the passed instances is not found
458

459
  """
460
  if not isinstance(instances, list):
461
    raise errors.OpPrereqError("Invalid argument type 'instances'",
462
                               errors.ECODE_INVAL)
463

    
464
  if instances:
465
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
466
  else:
467
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
468
  return wanted
469

    
470

    
471
def _CheckOutputFields(static, dynamic, selected):
472
  """Checks whether all selected fields are valid.
473

474
  @type static: L{utils.FieldSet}
475
  @param static: static fields set
476
  @type dynamic: L{utils.FieldSet}
477
  @param dynamic: dynamic fields set
478

479
  """
480
  f = utils.FieldSet()
481
  f.Extend(static)
482
  f.Extend(dynamic)
483

    
484
  delta = f.NonMatching(selected)
485
  if delta:
486
    raise errors.OpPrereqError("Unknown output fields selected: %s"
487
                               % ",".join(delta), errors.ECODE_INVAL)
488

    
489

    
490
def _CheckBooleanOpField(op, name):
491
  """Validates boolean opcode parameters.
492

493
  This will ensure that an opcode parameter is either a boolean value,
494
  or None (but that it always exists).
495

496
  """
497
  val = getattr(op, name, None)
498
  if not (val is None or isinstance(val, bool)):
499
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
500
                               (name, str(val)), errors.ECODE_INVAL)
501
  setattr(op, name, val)
502

    
503

    
504
def _CheckGlobalHvParams(params):
505
  """Validates that given hypervisor params are not global ones.
506

507
  This will ensure that instances don't get customised versions of
508
  global params.
509

510
  """
511
  used_globals = constants.HVC_GLOBALS.intersection(params)
512
  if used_globals:
513
    msg = ("The following hypervisor parameters are global and cannot"
514
           " be customized at instance level, please modify them at"
515
           " cluster level: %s" % utils.CommaJoin(used_globals))
516
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
517

    
518

    
519
def _CheckNodeOnline(lu, node):
520
  """Ensure that a given node is online.
521

522
  @param lu: the LU on behalf of which we make the check
523
  @param node: the node to check
524
  @raise errors.OpPrereqError: if the node is offline
525

526
  """
527
  if lu.cfg.GetNodeInfo(node).offline:
528
    raise errors.OpPrereqError("Can't use offline node %s" % node,
529
                               errors.ECODE_INVAL)
530

    
531

    
532
def _CheckNodeNotDrained(lu, node):
533
  """Ensure that a given node is not drained.
534

535
  @param lu: the LU on behalf of which we make the check
536
  @param node: the node to check
537
  @raise errors.OpPrereqError: if the node is drained
538

539
  """
540
  if lu.cfg.GetNodeInfo(node).drained:
541
    raise errors.OpPrereqError("Can't use drained node %s" % node,
542
                               errors.ECODE_INVAL)
543

    
544

    
545
def _CheckNodeHasOS(lu, node, os_name, force_variant):
546
  """Ensure that a node supports a given OS.
547

548
  @param lu: the LU on behalf of which we make the check
549
  @param node: the node to check
550
  @param os_name: the OS to query about
551
  @param force_variant: whether to ignore variant errors
552
  @raise errors.OpPrereqError: if the node is not supporting the OS
553

554
  """
555
  result = lu.rpc.call_os_get(node, os_name)
556
  result.Raise("OS '%s' not in supported OS list for node %s" %
557
               (os_name, node),
558
               prereq=True, ecode=errors.ECODE_INVAL)
559
  if not force_variant:
560
    _CheckOSVariant(result.payload, os_name)
561

    
562

    
563
def _CheckDiskTemplate(template):
564
  """Ensure a given disk template is valid.
565

566
  """
567
  if template not in constants.DISK_TEMPLATES:
568
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
569
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
570
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
571
  if template == constants.DT_FILE and not constants.ENABLE_FILE_STORAGE:
572
    raise errors.OpPrereqError("File storage disabled at configure time",
573
                               errors.ECODE_INVAL)
574

    
575

    
576
def _CheckInstanceDown(lu, instance, reason):
577
  """Ensure that an instance is not running."""
578
  if instance.admin_up:
579
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
580
                               (instance.name, reason), errors.ECODE_STATE)
581

    
582
  pnode = instance.primary_node
583
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
584
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
585
              prereq=True, ecode=errors.ECODE_ENVIRON)
586

    
587
  if instance.name in ins_l.payload:
588
    raise errors.OpPrereqError("Instance %s is running, %s" %
589
                               (instance.name, reason), errors.ECODE_STATE)
590

    
591

    
592
def _ExpandItemName(fn, name, kind):
593
  """Expand an item name.
594

595
  @param fn: the function to use for expansion
596
  @param name: requested item name
597
  @param kind: text description ('Node' or 'Instance')
598
  @return: the resolved (full) name
599
  @raise errors.OpPrereqError: if the item is not found
600

601
  """
602
  full_name = fn(name)
603
  if full_name is None:
604
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
605
                               errors.ECODE_NOENT)
606
  return full_name
607

    
608

    
609
def _ExpandNodeName(cfg, name):
610
  """Wrapper over L{_ExpandItemName} for nodes."""
611
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
612

    
613

    
614
def _ExpandInstanceName(cfg, name):
615
  """Wrapper over L{_ExpandItemName} for instance."""
616
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
617

    
618

    
619
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
620
                          memory, vcpus, nics, disk_template, disks,
621
                          bep, hvp, hypervisor_name):
622
  """Builds instance related env variables for hooks
623

624
  This builds the hook environment from individual variables.
625

626
  @type name: string
627
  @param name: the name of the instance
628
  @type primary_node: string
629
  @param primary_node: the name of the instance's primary node
630
  @type secondary_nodes: list
631
  @param secondary_nodes: list of secondary nodes as strings
632
  @type os_type: string
633
  @param os_type: the name of the instance's OS
634
  @type status: boolean
635
  @param status: the should_run status of the instance
636
  @type memory: string
637
  @param memory: the memory size of the instance
638
  @type vcpus: string
639
  @param vcpus: the count of VCPUs the instance has
640
  @type nics: list
641
  @param nics: list of tuples (ip, mac, mode, link) representing
642
      the NICs the instance has
643
  @type disk_template: string
644
  @param disk_template: the disk template of the instance
645
  @type disks: list
646
  @param disks: the list of (size, mode) pairs
647
  @type bep: dict
648
  @param bep: the backend parameters for the instance
649
  @type hvp: dict
650
  @param hvp: the hypervisor parameters for the instance
651
  @type hypervisor_name: string
652
  @param hypervisor_name: the hypervisor for the instance
653
  @rtype: dict
654
  @return: the hook environment for this instance
655

656
  """
657
  if status:
658
    str_status = "up"
659
  else:
660
    str_status = "down"
661
  env = {
662
    "OP_TARGET": name,
663
    "INSTANCE_NAME": name,
664
    "INSTANCE_PRIMARY": primary_node,
665
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
666
    "INSTANCE_OS_TYPE": os_type,
667
    "INSTANCE_STATUS": str_status,
668
    "INSTANCE_MEMORY": memory,
669
    "INSTANCE_VCPUS": vcpus,
670
    "INSTANCE_DISK_TEMPLATE": disk_template,
671
    "INSTANCE_HYPERVISOR": hypervisor_name,
672
  }
673

    
674
  if nics:
675
    nic_count = len(nics)
676
    for idx, (ip, mac, mode, link) in enumerate(nics):
677
      if ip is None:
678
        ip = ""
679
      env["INSTANCE_NIC%d_IP" % idx] = ip
680
      env["INSTANCE_NIC%d_MAC" % idx] = mac
681
      env["INSTANCE_NIC%d_MODE" % idx] = mode
682
      env["INSTANCE_NIC%d_LINK" % idx] = link
683
      if mode == constants.NIC_MODE_BRIDGED:
684
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
685
  else:
686
    nic_count = 0
687

    
688
  env["INSTANCE_NIC_COUNT"] = nic_count
689

    
690
  if disks:
691
    disk_count = len(disks)
692
    for idx, (size, mode) in enumerate(disks):
693
      env["INSTANCE_DISK%d_SIZE" % idx] = size
694
      env["INSTANCE_DISK%d_MODE" % idx] = mode
695
  else:
696
    disk_count = 0
697

    
698
  env["INSTANCE_DISK_COUNT"] = disk_count
699

    
700
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
701
    for key, value in source.items():
702
      env["INSTANCE_%s_%s" % (kind, key)] = value
703

    
704
  return env
705

    
706

    
707
def _NICListToTuple(lu, nics):
708
  """Build a list of nic information tuples.
709

710
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
711
  value in LUQueryInstanceData.
712

713
  @type lu:  L{LogicalUnit}
714
  @param lu: the logical unit on whose behalf we execute
715
  @type nics: list of L{objects.NIC}
716
  @param nics: list of nics to convert to hooks tuples
717

718
  """
719
  hooks_nics = []
720
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
721
  for nic in nics:
722
    ip = nic.ip
723
    mac = nic.mac
724
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
725
    mode = filled_params[constants.NIC_MODE]
726
    link = filled_params[constants.NIC_LINK]
727
    hooks_nics.append((ip, mac, mode, link))
728
  return hooks_nics
729

    
730

    
731
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
732
  """Builds instance related env variables for hooks from an object.
733

734
  @type lu: L{LogicalUnit}
735
  @param lu: the logical unit on whose behalf we execute
736
  @type instance: L{objects.Instance}
737
  @param instance: the instance for which we should build the
738
      environment
739
  @type override: dict
740
  @param override: dictionary with key/values that will override
741
      our values
742
  @rtype: dict
743
  @return: the hook environment dictionary
744

745
  """
746
  cluster = lu.cfg.GetClusterInfo()
747
  bep = cluster.FillBE(instance)
748
  hvp = cluster.FillHV(instance)
749
  args = {
750
    'name': instance.name,
751
    'primary_node': instance.primary_node,
752
    'secondary_nodes': instance.secondary_nodes,
753
    'os_type': instance.os,
754
    'status': instance.admin_up,
755
    'memory': bep[constants.BE_MEMORY],
756
    'vcpus': bep[constants.BE_VCPUS],
757
    'nics': _NICListToTuple(lu, instance.nics),
758
    'disk_template': instance.disk_template,
759
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
760
    'bep': bep,
761
    'hvp': hvp,
762
    'hypervisor_name': instance.hypervisor,
763
  }
764
  if override:
765
    args.update(override)
766
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
767

    
768

    
769
def _AdjustCandidatePool(lu, exceptions):
770
  """Adjust the candidate pool after node operations.
771

772
  """
773
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
774
  if mod_list:
775
    lu.LogInfo("Promoted nodes to master candidate role: %s",
776
               utils.CommaJoin(node.name for node in mod_list))
777
    for name in mod_list:
778
      lu.context.ReaddNode(name)
779
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
780
  if mc_now > mc_max:
781
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
782
               (mc_now, mc_max))
783

    
784

    
785
def _DecideSelfPromotion(lu, exceptions=None):
786
  """Decide whether I should promote myself as a master candidate.
787

788
  """
789
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
790
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
791
  # the new node will increase mc_max with one, so:
792
  mc_should = min(mc_should + 1, cp_size)
793
  return mc_now < mc_should
794

    
795

    
796
def _CheckNicsBridgesExist(lu, target_nics, target_node,
797
                               profile=constants.PP_DEFAULT):
798
  """Check that the brigdes needed by a list of nics exist.
799

800
  """
801
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
802
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
803
                for nic in target_nics]
804
  brlist = [params[constants.NIC_LINK] for params in paramslist
805
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
806
  if brlist:
807
    result = lu.rpc.call_bridges_exist(target_node, brlist)
808
    result.Raise("Error checking bridges on destination node '%s'" %
809
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
810

    
811

    
812
def _CheckInstanceBridgesExist(lu, instance, node=None):
813
  """Check that the brigdes needed by an instance exist.
814

815
  """
816
  if node is None:
817
    node = instance.primary_node
818
  _CheckNicsBridgesExist(lu, instance.nics, node)
819

    
820

    
821
def _CheckOSVariant(os_obj, name):
822
  """Check whether an OS name conforms to the os variants specification.
823

824
  @type os_obj: L{objects.OS}
825
  @param os_obj: OS object to check
826
  @type name: string
827
  @param name: OS name passed by the user, to check for validity
828

829
  """
830
  if not os_obj.supported_variants:
831
    return
832
  try:
833
    variant = name.split("+", 1)[1]
834
  except IndexError:
835
    raise errors.OpPrereqError("OS name must include a variant",
836
                               errors.ECODE_INVAL)
837

    
838
  if variant not in os_obj.supported_variants:
839
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
840

    
841

    
842
def _GetNodeInstancesInner(cfg, fn):
843
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
844

    
845

    
846
def _GetNodeInstances(cfg, node_name):
847
  """Returns a list of all primary and secondary instances on a node.
848

849
  """
850

    
851
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
852

    
853

    
854
def _GetNodePrimaryInstances(cfg, node_name):
855
  """Returns primary instances on a node.
856

857
  """
858
  return _GetNodeInstancesInner(cfg,
859
                                lambda inst: node_name == inst.primary_node)
860

    
861

    
862
def _GetNodeSecondaryInstances(cfg, node_name):
863
  """Returns secondary instances on a node.
864

865
  """
866
  return _GetNodeInstancesInner(cfg,
867
                                lambda inst: node_name in inst.secondary_nodes)
868

    
869

    
870
def _GetStorageTypeArgs(cfg, storage_type):
871
  """Returns the arguments for a storage type.
872

873
  """
874
  # Special case for file storage
875
  if storage_type == constants.ST_FILE:
876
    # storage.FileStorage wants a list of storage directories
877
    return [[cfg.GetFileStorageDir()]]
878

    
879
  return []
880

    
881

    
882
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
883
  faulty = []
884

    
885
  for dev in instance.disks:
886
    cfg.SetDiskID(dev, node_name)
887

    
888
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
889
  result.Raise("Failed to get disk status from node %s" % node_name,
890
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
891

    
892
  for idx, bdev_status in enumerate(result.payload):
893
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
894
      faulty.append(idx)
895

    
896
  return faulty
897

    
898

    
899
def _FormatTimestamp(secs):
900
  """Formats a Unix timestamp with the local timezone.
901

902
  """
903
  return time.strftime("%F %T %Z", time.gmtime(secs))
904

    
905

    
906
class LUPostInitCluster(LogicalUnit):
907
  """Logical unit for running hooks after cluster initialization.
908

909
  """
910
  HPATH = "cluster-init"
911
  HTYPE = constants.HTYPE_CLUSTER
912
  _OP_REQP = []
913

    
914
  def BuildHooksEnv(self):
915
    """Build hooks env.
916

917
    """
918
    env = {"OP_TARGET": self.cfg.GetClusterName()}
919
    mn = self.cfg.GetMasterNode()
920
    return env, [], [mn]
921

    
922
  def CheckPrereq(self):
923
    """No prerequisites to check.
924

925
    """
926
    return True
927

    
928
  def Exec(self, feedback_fn):
929
    """Nothing to do.
930

931
    """
932
    return True
933

    
934

    
935
class LUDestroyCluster(LogicalUnit):
936
  """Logical unit for destroying the cluster.
937

938
  """
939
  HPATH = "cluster-destroy"
940
  HTYPE = constants.HTYPE_CLUSTER
941
  _OP_REQP = []
942

    
943
  def BuildHooksEnv(self):
944
    """Build hooks env.
945

946
    """
947
    env = {"OP_TARGET": self.cfg.GetClusterName()}
948
    return env, [], []
949

    
950
  def CheckPrereq(self):
951
    """Check prerequisites.
952

953
    This checks whether the cluster is empty.
954

955
    Any errors are signaled by raising errors.OpPrereqError.
956

957
    """
958
    master = self.cfg.GetMasterNode()
959

    
960
    nodelist = self.cfg.GetNodeList()
961
    if len(nodelist) != 1 or nodelist[0] != master:
962
      raise errors.OpPrereqError("There are still %d node(s) in"
963
                                 " this cluster." % (len(nodelist) - 1),
964
                                 errors.ECODE_INVAL)
965
    instancelist = self.cfg.GetInstanceList()
966
    if instancelist:
967
      raise errors.OpPrereqError("There are still %d instance(s) in"
968
                                 " this cluster." % len(instancelist),
969
                                 errors.ECODE_INVAL)
970

    
971
  def Exec(self, feedback_fn):
972
    """Destroys the cluster.
973

974
    """
975
    master = self.cfg.GetMasterNode()
976
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
977

    
978
    # Run post hooks on master node before it's removed
979
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
980
    try:
981
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
982
    except:
983
      # pylint: disable-msg=W0702
984
      self.LogWarning("Errors occurred running hooks on %s" % master)
985

    
986
    result = self.rpc.call_node_stop_master(master, False)
987
    result.Raise("Could not disable the master role")
988

    
989
    if modify_ssh_setup:
990
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
991
      utils.CreateBackup(priv_key)
992
      utils.CreateBackup(pub_key)
993

    
994
    return master
995

    
996

    
997
def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
998
                            warn_days=constants.SSL_CERT_EXPIRATION_WARN,
999
                            error_days=constants.SSL_CERT_EXPIRATION_ERROR):
1000
  """Verifies certificate details for LUVerifyCluster.
1001

1002
  """
1003
  if expired:
1004
    msg = "Certificate %s is expired" % filename
1005

    
1006
    if not_before is not None and not_after is not None:
1007
      msg += (" (valid from %s to %s)" %
1008
              (_FormatTimestamp(not_before),
1009
               _FormatTimestamp(not_after)))
1010
    elif not_before is not None:
1011
      msg += " (valid from %s)" % _FormatTimestamp(not_before)
1012
    elif not_after is not None:
1013
      msg += " (valid until %s)" % _FormatTimestamp(not_after)
1014

    
1015
    return (LUVerifyCluster.ETYPE_ERROR, msg)
1016

    
1017
  elif not_before is not None and not_before > now:
1018
    return (LUVerifyCluster.ETYPE_WARNING,
1019
            "Certificate %s not yet valid (valid from %s)" %
1020
            (filename, _FormatTimestamp(not_before)))
1021

    
1022
  elif not_after is not None:
1023
    remaining_days = int((not_after - now) / (24 * 3600))
1024

    
1025
    msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
1026

    
1027
    if remaining_days <= error_days:
1028
      return (LUVerifyCluster.ETYPE_ERROR, msg)
1029

    
1030
    if remaining_days <= warn_days:
1031
      return (LUVerifyCluster.ETYPE_WARNING, msg)
1032

    
1033
  return (None, None)
1034

    
1035

    
1036
def _VerifyCertificate(filename):
1037
  """Verifies a certificate for LUVerifyCluster.
1038

1039
  @type filename: string
1040
  @param filename: Path to PEM file
1041

1042
  """
1043
  try:
1044
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1045
                                           utils.ReadFile(filename))
1046
  except Exception, err: # pylint: disable-msg=W0703
1047
    return (LUVerifyCluster.ETYPE_ERROR,
1048
            "Failed to load X509 certificate %s: %s" % (filename, err))
1049

    
1050
  # Depending on the pyOpenSSL version, this can just return (None, None)
1051
  (not_before, not_after) = utils.GetX509CertValidity(cert)
1052

    
1053
  return _VerifyCertificateInner(filename, cert.has_expired(),
1054
                                 not_before, not_after, time.time())
1055

    
1056

    
1057
class LUVerifyCluster(LogicalUnit):
1058
  """Verifies the cluster status.
1059

1060
  """
1061
  HPATH = "cluster-verify"
1062
  HTYPE = constants.HTYPE_CLUSTER
1063
  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1064
  REQ_BGL = False
1065

    
1066
  TCLUSTER = "cluster"
1067
  TNODE = "node"
1068
  TINSTANCE = "instance"
1069

    
1070
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1071
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1072
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1073
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1074
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1075
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1076
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1077
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1078
  ENODEDRBD = (TNODE, "ENODEDRBD")
1079
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1080
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1081
  ENODEHV = (TNODE, "ENODEHV")
1082
  ENODELVM = (TNODE, "ENODELVM")
1083
  ENODEN1 = (TNODE, "ENODEN1")
1084
  ENODENET = (TNODE, "ENODENET")
1085
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1086
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1087
  ENODERPC = (TNODE, "ENODERPC")
1088
  ENODESSH = (TNODE, "ENODESSH")
1089
  ENODEVERSION = (TNODE, "ENODEVERSION")
1090
  ENODESETUP = (TNODE, "ENODESETUP")
1091
  ENODETIME = (TNODE, "ENODETIME")
1092

    
1093
  ETYPE_FIELD = "code"
1094
  ETYPE_ERROR = "ERROR"
1095
  ETYPE_WARNING = "WARNING"
1096

    
1097
  class NodeImage(object):
1098
    """A class representing the logical and physical status of a node.
1099

1100
    @ivar volumes: a structure as returned from
1101
        L{ganeti.backend.GetVolumeList} (runtime)
1102
    @ivar instances: a list of running instances (runtime)
1103
    @ivar pinst: list of configured primary instances (config)
1104
    @ivar sinst: list of configured secondary instances (config)
1105
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1106
        of this node (config)
1107
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1108
    @ivar dfree: free disk, as reported by the node (runtime)
1109
    @ivar offline: the offline status (config)
1110
    @type rpc_fail: boolean
1111
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1112
        not whether the individual keys were correct) (runtime)
1113
    @type lvm_fail: boolean
1114
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1115
    @type hyp_fail: boolean
1116
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1117
    @type ghost: boolean
1118
    @ivar ghost: whether this is a known node or not (config)
1119

1120
    """
1121
    def __init__(self, offline=False):
1122
      self.volumes = {}
1123
      self.instances = []
1124
      self.pinst = []
1125
      self.sinst = []
1126
      self.sbp = {}
1127
      self.mfree = 0
1128
      self.dfree = 0
1129
      self.offline = offline
1130
      self.rpc_fail = False
1131
      self.lvm_fail = False
1132
      self.hyp_fail = False
1133
      self.ghost = False
1134

    
1135
  def ExpandNames(self):
1136
    self.needed_locks = {
1137
      locking.LEVEL_NODE: locking.ALL_SET,
1138
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1139
    }
1140
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1141

    
1142
  def _Error(self, ecode, item, msg, *args, **kwargs):
1143
    """Format an error message.
1144

1145
    Based on the opcode's error_codes parameter, either format a
1146
    parseable error code, or a simpler error string.
1147

1148
    This must be called only from Exec and functions called from Exec.
1149

1150
    """
1151
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1152
    itype, etxt = ecode
1153
    # first complete the msg
1154
    if args:
1155
      msg = msg % args
1156
    # then format the whole message
1157
    if self.op.error_codes:
1158
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1159
    else:
1160
      if item:
1161
        item = " " + item
1162
      else:
1163
        item = ""
1164
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1165
    # and finally report it via the feedback_fn
1166
    self._feedback_fn("  - %s" % msg)
1167

    
1168
  def _ErrorIf(self, cond, *args, **kwargs):
1169
    """Log an error message if the passed condition is True.
1170

1171
    """
1172
    cond = bool(cond) or self.op.debug_simulate_errors
1173
    if cond:
1174
      self._Error(*args, **kwargs)
1175
    # do not mark the operation as failed for WARN cases only
1176
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1177
      self.bad = self.bad or cond
1178

    
1179
  def _VerifyNode(self, ninfo, nresult):
1180
    """Run multiple tests against a node.
1181

1182
    Test list:
1183

1184
      - compares ganeti version
1185
      - checks vg existence and size > 20G
1186
      - checks config file checksum
1187
      - checks ssh to other nodes
1188

1189
    @type ninfo: L{objects.Node}
1190
    @param ninfo: the node to check
1191
    @param nresult: the results from the node
1192
    @rtype: boolean
1193
    @return: whether overall this call was successful (and we can expect
1194
         reasonable values in the respose)
1195

1196
    """
1197
    node = ninfo.name
1198
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1199

    
1200
    # main result, nresult should be a non-empty dict
1201
    test = not nresult or not isinstance(nresult, dict)
1202
    _ErrorIf(test, self.ENODERPC, node,
1203
                  "unable to verify node: no data returned")
1204
    if test:
1205
      return False
1206

    
1207
    # compares ganeti version
1208
    local_version = constants.PROTOCOL_VERSION
1209
    remote_version = nresult.get("version", None)
1210
    test = not (remote_version and
1211
                isinstance(remote_version, (list, tuple)) and
1212
                len(remote_version) == 2)
1213
    _ErrorIf(test, self.ENODERPC, node,
1214
             "connection to node returned invalid data")
1215
    if test:
1216
      return False
1217

    
1218
    test = local_version != remote_version[0]
1219
    _ErrorIf(test, self.ENODEVERSION, node,
1220
             "incompatible protocol versions: master %s,"
1221
             " node %s", local_version, remote_version[0])
1222
    if test:
1223
      return False
1224

    
1225
    # node seems compatible, we can actually try to look into its results
1226

    
1227
    # full package version
1228
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1229
                  self.ENODEVERSION, node,
1230
                  "software version mismatch: master %s, node %s",
1231
                  constants.RELEASE_VERSION, remote_version[1],
1232
                  code=self.ETYPE_WARNING)
1233

    
1234
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1235
    if isinstance(hyp_result, dict):
1236
      for hv_name, hv_result in hyp_result.iteritems():
1237
        test = hv_result is not None
1238
        _ErrorIf(test, self.ENODEHV, node,
1239
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1240

    
1241

    
1242
    test = nresult.get(constants.NV_NODESETUP,
1243
                           ["Missing NODESETUP results"])
1244
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1245
             "; ".join(test))
1246

    
1247
    return True
1248

    
1249
  def _VerifyNodeTime(self, ninfo, nresult,
1250
                      nvinfo_starttime, nvinfo_endtime):
1251
    """Check the node time.
1252

1253
    @type ninfo: L{objects.Node}
1254
    @param ninfo: the node to check
1255
    @param nresult: the remote results for the node
1256
    @param nvinfo_starttime: the start time of the RPC call
1257
    @param nvinfo_endtime: the end time of the RPC call
1258

1259
    """
1260
    node = ninfo.name
1261
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1262

    
1263
    ntime = nresult.get(constants.NV_TIME, None)
1264
    try:
1265
      ntime_merged = utils.MergeTime(ntime)
1266
    except (ValueError, TypeError):
1267
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1268
      return
1269

    
1270
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1271
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1272
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1273
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1274
    else:
1275
      ntime_diff = None
1276

    
1277
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1278
             "Node time diverges by at least %s from master node time",
1279
             ntime_diff)
1280

    
1281
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1282
    """Check the node time.
1283

1284
    @type ninfo: L{objects.Node}
1285
    @param ninfo: the node to check
1286
    @param nresult: the remote results for the node
1287
    @param vg_name: the configured VG name
1288

1289
    """
1290
    if vg_name is None:
1291
      return
1292

    
1293
    node = ninfo.name
1294
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1295

    
1296
    # checks vg existence and size > 20G
1297
    vglist = nresult.get(constants.NV_VGLIST, None)
1298
    test = not vglist
1299
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1300
    if not test:
1301
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1302
                                            constants.MIN_VG_SIZE)
1303
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1304

    
1305
    # check pv names
1306
    pvlist = nresult.get(constants.NV_PVLIST, None)
1307
    test = pvlist is None
1308
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1309
    if not test:
1310
      # check that ':' is not present in PV names, since it's a
1311
      # special character for lvcreate (denotes the range of PEs to
1312
      # use on the PV)
1313
      for _, pvname, owner_vg in pvlist:
1314
        test = ":" in pvname
1315
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1316
                 " '%s' of VG '%s'", pvname, owner_vg)
1317

    
1318
  def _VerifyNodeNetwork(self, ninfo, nresult):
1319
    """Check the node time.
1320

1321
    @type ninfo: L{objects.Node}
1322
    @param ninfo: the node to check
1323
    @param nresult: the remote results for the node
1324

1325
    """
1326
    node = ninfo.name
1327
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1328

    
1329
    test = constants.NV_NODELIST not in nresult
1330
    _ErrorIf(test, self.ENODESSH, node,
1331
             "node hasn't returned node ssh connectivity data")
1332
    if not test:
1333
      if nresult[constants.NV_NODELIST]:
1334
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1335
          _ErrorIf(True, self.ENODESSH, node,
1336
                   "ssh communication with node '%s': %s", a_node, a_msg)
1337

    
1338
    test = constants.NV_NODENETTEST not in nresult
1339
    _ErrorIf(test, self.ENODENET, node,
1340
             "node hasn't returned node tcp connectivity data")
1341
    if not test:
1342
      if nresult[constants.NV_NODENETTEST]:
1343
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1344
        for anode in nlist:
1345
          _ErrorIf(True, self.ENODENET, node,
1346
                   "tcp communication with node '%s': %s",
1347
                   anode, nresult[constants.NV_NODENETTEST][anode])
1348

    
1349
  def _VerifyInstance(self, instance, instanceconfig, node_image):
1350
    """Verify an instance.
1351

1352
    This function checks to see if the required block devices are
1353
    available on the instance's node.
1354

1355
    """
1356
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1357
    node_current = instanceconfig.primary_node
1358

    
1359
    node_vol_should = {}
1360
    instanceconfig.MapLVsByNode(node_vol_should)
1361

    
1362
    for node in node_vol_should:
1363
      n_img = node_image[node]
1364
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1365
        # ignore missing volumes on offline or broken nodes
1366
        continue
1367
      for volume in node_vol_should[node]:
1368
        test = volume not in n_img.volumes
1369
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1370
                 "volume %s missing on node %s", volume, node)
1371

    
1372
    if instanceconfig.admin_up:
1373
      pri_img = node_image[node_current]
1374
      test = instance not in pri_img.instances and not pri_img.offline
1375
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1376
               "instance not running on its primary node %s",
1377
               node_current)
1378

    
1379
    for node, n_img in node_image.items():
1380
      if (not node == node_current):
1381
        test = instance in n_img.instances
1382
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1383
                 "instance should not run on node %s", node)
1384

    
1385
  def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1386
    """Verify if there are any unknown volumes in the cluster.
1387

1388
    The .os, .swap and backup volumes are ignored. All other volumes are
1389
    reported as unknown.
1390

1391
    """
1392
    for node, n_img in node_image.items():
1393
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1394
        # skip non-healthy nodes
1395
        continue
1396
      for volume in n_img.volumes:
1397
        test = (node not in node_vol_should or
1398
                volume not in node_vol_should[node])
1399
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1400
                      "volume %s is unknown", volume)
1401

    
1402
  def _VerifyOrphanInstances(self, instancelist, node_image):
1403
    """Verify the list of running instances.
1404

1405
    This checks what instances are running but unknown to the cluster.
1406

1407
    """
1408
    for node, n_img in node_image.items():
1409
      for o_inst in n_img.instances:
1410
        test = o_inst not in instancelist
1411
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1412
                      "instance %s on node %s should not exist", o_inst, node)
1413

    
1414
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1415
    """Verify N+1 Memory Resilience.
1416

1417
    Check that if one single node dies we can still start all the
1418
    instances it was primary for.
1419

1420
    """
1421
    for node, n_img in node_image.items():
1422
      # This code checks that every node which is now listed as
1423
      # secondary has enough memory to host all instances it is
1424
      # supposed to should a single other node in the cluster fail.
1425
      # FIXME: not ready for failover to an arbitrary node
1426
      # FIXME: does not support file-backed instances
1427
      # WARNING: we currently take into account down instances as well
1428
      # as up ones, considering that even if they're down someone
1429
      # might want to start them even in the event of a node failure.
1430
      for prinode, instances in n_img.sbp.items():
1431
        needed_mem = 0
1432
        for instance in instances:
1433
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1434
          if bep[constants.BE_AUTO_BALANCE]:
1435
            needed_mem += bep[constants.BE_MEMORY]
1436
        test = n_img.mfree < needed_mem
1437
        self._ErrorIf(test, self.ENODEN1, node,
1438
                      "not enough memory on to accommodate"
1439
                      " failovers should peer node %s fail", prinode)
1440

    
1441
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1442
                       master_files):
1443
    """Verifies and computes the node required file checksums.
1444

1445
    @type ninfo: L{objects.Node}
1446
    @param ninfo: the node to check
1447
    @param nresult: the remote results for the node
1448
    @param file_list: required list of files
1449
    @param local_cksum: dictionary of local files and their checksums
1450
    @param master_files: list of files that only masters should have
1451

1452
    """
1453
    node = ninfo.name
1454
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1455

    
1456
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1457
    test = not isinstance(remote_cksum, dict)
1458
    _ErrorIf(test, self.ENODEFILECHECK, node,
1459
             "node hasn't returned file checksum data")
1460
    if test:
1461
      return
1462

    
1463
    for file_name in file_list:
1464
      node_is_mc = ninfo.master_candidate
1465
      must_have = (file_name not in master_files) or node_is_mc
1466
      # missing
1467
      test1 = file_name not in remote_cksum
1468
      # invalid checksum
1469
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1470
      # existing and good
1471
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1472
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1473
               "file '%s' missing", file_name)
1474
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1475
               "file '%s' has wrong checksum", file_name)
1476
      # not candidate and this is not a must-have file
1477
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1478
               "file '%s' should not exist on non master"
1479
               " candidates (and the file is outdated)", file_name)
1480
      # all good, except non-master/non-must have combination
1481
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1482
               "file '%s' should not exist"
1483
               " on non master candidates", file_name)
1484

    
1485
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1486
    """Verifies and the node DRBD status.
1487

1488
    @type ninfo: L{objects.Node}
1489
    @param ninfo: the node to check
1490
    @param nresult: the remote results for the node
1491
    @param instanceinfo: the dict of instances
1492
    @param drbd_map: the DRBD map as returned by
1493
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1494

1495
    """
1496
    node = ninfo.name
1497
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1498

    
1499
    # compute the DRBD minors
1500
    node_drbd = {}
1501
    for minor, instance in drbd_map[node].items():
1502
      test = instance not in instanceinfo
1503
      _ErrorIf(test, self.ECLUSTERCFG, None,
1504
               "ghost instance '%s' in temporary DRBD map", instance)
1505
        # ghost instance should not be running, but otherwise we
1506
        # don't give double warnings (both ghost instance and
1507
        # unallocated minor in use)
1508
      if test:
1509
        node_drbd[minor] = (instance, False)
1510
      else:
1511
        instance = instanceinfo[instance]
1512
        node_drbd[minor] = (instance.name, instance.admin_up)
1513

    
1514
    # and now check them
1515
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1516
    test = not isinstance(used_minors, (tuple, list))
1517
    _ErrorIf(test, self.ENODEDRBD, node,
1518
             "cannot parse drbd status file: %s", str(used_minors))
1519
    if test:
1520
      # we cannot check drbd status
1521
      return
1522

    
1523
    for minor, (iname, must_exist) in node_drbd.items():
1524
      test = minor not in used_minors and must_exist
1525
      _ErrorIf(test, self.ENODEDRBD, node,
1526
               "drbd minor %d of instance %s is not active", minor, iname)
1527
    for minor in used_minors:
1528
      test = minor not in node_drbd
1529
      _ErrorIf(test, self.ENODEDRBD, node,
1530
               "unallocated drbd minor %d is in use", minor)
1531

    
1532
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1533
    """Verifies and updates the node volume data.
1534

1535
    This function will update a L{NodeImage}'s internal structures
1536
    with data from the remote call.
1537

1538
    @type ninfo: L{objects.Node}
1539
    @param ninfo: the node to check
1540
    @param nresult: the remote results for the node
1541
    @param nimg: the node image object
1542
    @param vg_name: the configured VG name
1543

1544
    """
1545
    node = ninfo.name
1546
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1547

    
1548
    nimg.lvm_fail = True
1549
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1550
    if vg_name is None:
1551
      pass
1552
    elif isinstance(lvdata, basestring):
1553
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1554
               utils.SafeEncode(lvdata))
1555
    elif not isinstance(lvdata, dict):
1556
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1557
    else:
1558
      nimg.volumes = lvdata
1559
      nimg.lvm_fail = False
1560

    
1561
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1562
    """Verifies and updates the node instance list.
1563

1564
    If the listing was successful, then updates this node's instance
1565
    list. Otherwise, it marks the RPC call as failed for the instance
1566
    list key.
1567

1568
    @type ninfo: L{objects.Node}
1569
    @param ninfo: the node to check
1570
    @param nresult: the remote results for the node
1571
    @param nimg: the node image object
1572

1573
    """
1574
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1575
    test = not isinstance(idata, list)
1576
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1577
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1578
    if test:
1579
      nimg.hyp_fail = True
1580
    else:
1581
      nimg.instances = idata
1582

    
1583
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1584
    """Verifies and computes a node information map
1585

1586
    @type ninfo: L{objects.Node}
1587
    @param ninfo: the node to check
1588
    @param nresult: the remote results for the node
1589
    @param nimg: the node image object
1590
    @param vg_name: the configured VG name
1591

1592
    """
1593
    node = ninfo.name
1594
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1595

    
1596
    # try to read free memory (from the hypervisor)
1597
    hv_info = nresult.get(constants.NV_HVINFO, None)
1598
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1599
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1600
    if not test:
1601
      try:
1602
        nimg.mfree = int(hv_info["memory_free"])
1603
      except (ValueError, TypeError):
1604
        _ErrorIf(True, self.ENODERPC, node,
1605
                 "node returned invalid nodeinfo, check hypervisor")
1606

    
1607
    # FIXME: devise a free space model for file based instances as well
1608
    if vg_name is not None:
1609
      test = (constants.NV_VGLIST not in nresult or
1610
              vg_name not in nresult[constants.NV_VGLIST])
1611
      _ErrorIf(test, self.ENODELVM, node,
1612
               "node didn't return data for the volume group '%s'"
1613
               " - it is either missing or broken", vg_name)
1614
      if not test:
1615
        try:
1616
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1617
        except (ValueError, TypeError):
1618
          _ErrorIf(True, self.ENODERPC, node,
1619
                   "node returned invalid LVM info, check LVM status")
1620

    
1621
  def CheckPrereq(self):
1622
    """Check prerequisites.
1623

1624
    Transform the list of checks we're going to skip into a set and check that
1625
    all its members are valid.
1626

1627
    """
1628
    self.skip_set = frozenset(self.op.skip_checks)
1629
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1630
      raise errors.OpPrereqError("Invalid checks to be skipped specified",
1631
                                 errors.ECODE_INVAL)
1632

    
1633
  def BuildHooksEnv(self):
1634
    """Build hooks env.
1635

1636
    Cluster-Verify hooks just ran in the post phase and their failure makes
1637
    the output be logged in the verify output and the verification to fail.
1638

1639
    """
1640
    all_nodes = self.cfg.GetNodeList()
1641
    env = {
1642
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1643
      }
1644
    for node in self.cfg.GetAllNodesInfo().values():
1645
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1646

    
1647
    return env, [], all_nodes
1648

    
1649
  def Exec(self, feedback_fn):
1650
    """Verify integrity of cluster, performing various test on nodes.
1651

1652
    """
1653
    self.bad = False
1654
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1655
    verbose = self.op.verbose
1656
    self._feedback_fn = feedback_fn
1657
    feedback_fn("* Verifying global settings")
1658
    for msg in self.cfg.VerifyConfig():
1659
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1660

    
1661
    # Check the cluster certificates
1662
    for cert_filename in constants.ALL_CERT_FILES:
1663
      (errcode, msg) = _VerifyCertificate(cert_filename)
1664
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1665

    
1666
    vg_name = self.cfg.GetVGName()
1667
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1668
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1669
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1670
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1671
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1672
                        for iname in instancelist)
1673
    i_non_redundant = [] # Non redundant instances
1674
    i_non_a_balanced = [] # Non auto-balanced instances
1675
    n_offline = 0 # Count of offline nodes
1676
    n_drained = 0 # Count of nodes being drained
1677
    node_vol_should = {}
1678

    
1679
    # FIXME: verify OS list
1680
    # do local checksums
1681
    master_files = [constants.CLUSTER_CONF_FILE]
1682

    
1683
    file_names = ssconf.SimpleStore().GetFileList()
1684
    file_names.extend(constants.ALL_CERT_FILES)
1685
    file_names.extend(master_files)
1686

    
1687
    local_checksums = utils.FingerprintFiles(file_names)
1688

    
1689
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1690
    node_verify_param = {
1691
      constants.NV_FILELIST: file_names,
1692
      constants.NV_NODELIST: [node.name for node in nodeinfo
1693
                              if not node.offline],
1694
      constants.NV_HYPERVISOR: hypervisors,
1695
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1696
                                  node.secondary_ip) for node in nodeinfo
1697
                                 if not node.offline],
1698
      constants.NV_INSTANCELIST: hypervisors,
1699
      constants.NV_VERSION: None,
1700
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1701
      constants.NV_NODESETUP: None,
1702
      constants.NV_TIME: None,
1703
      }
1704

    
1705
    if vg_name is not None:
1706
      node_verify_param[constants.NV_VGLIST] = None
1707
      node_verify_param[constants.NV_LVLIST] = vg_name
1708
      node_verify_param[constants.NV_PVLIST] = [vg_name]
1709
      node_verify_param[constants.NV_DRBDLIST] = None
1710

    
1711
    # Build our expected cluster state
1712
    node_image = dict((node.name, self.NodeImage(offline=node.offline))
1713
                      for node in nodeinfo)
1714

    
1715
    for instance in instancelist:
1716
      inst_config = instanceinfo[instance]
1717

    
1718
      for nname in inst_config.all_nodes:
1719
        if nname not in node_image:
1720
          # ghost node
1721
          gnode = self.NodeImage()
1722
          gnode.ghost = True
1723
          node_image[nname] = gnode
1724

    
1725
      inst_config.MapLVsByNode(node_vol_should)
1726

    
1727
      pnode = inst_config.primary_node
1728
      node_image[pnode].pinst.append(instance)
1729

    
1730
      for snode in inst_config.secondary_nodes:
1731
        nimg = node_image[snode]
1732
        nimg.sinst.append(instance)
1733
        if pnode not in nimg.sbp:
1734
          nimg.sbp[pnode] = []
1735
        nimg.sbp[pnode].append(instance)
1736

    
1737
    # At this point, we have the in-memory data structures complete,
1738
    # except for the runtime information, which we'll gather next
1739

    
1740
    # Due to the way our RPC system works, exact response times cannot be
1741
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1742
    # time before and after executing the request, we can at least have a time
1743
    # window.
1744
    nvinfo_starttime = time.time()
1745
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1746
                                           self.cfg.GetClusterName())
1747
    nvinfo_endtime = time.time()
1748

    
1749
    cluster = self.cfg.GetClusterInfo()
1750
    master_node = self.cfg.GetMasterNode()
1751
    all_drbd_map = self.cfg.ComputeDRBDMap()
1752

    
1753
    feedback_fn("* Verifying node status")
1754
    for node_i in nodeinfo:
1755
      node = node_i.name
1756
      nimg = node_image[node]
1757

    
1758
      if node_i.offline:
1759
        if verbose:
1760
          feedback_fn("* Skipping offline node %s" % (node,))
1761
        n_offline += 1
1762
        continue
1763

    
1764
      if node == master_node:
1765
        ntype = "master"
1766
      elif node_i.master_candidate:
1767
        ntype = "master candidate"
1768
      elif node_i.drained:
1769
        ntype = "drained"
1770
        n_drained += 1
1771
      else:
1772
        ntype = "regular"
1773
      if verbose:
1774
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1775

    
1776
      msg = all_nvinfo[node].fail_msg
1777
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1778
      if msg:
1779
        nimg.rpc_fail = True
1780
        continue
1781

    
1782
      nresult = all_nvinfo[node].payload
1783

    
1784
      nimg.call_ok = self._VerifyNode(node_i, nresult)
1785
      self._VerifyNodeNetwork(node_i, nresult)
1786
      self._VerifyNodeLVM(node_i, nresult, vg_name)
1787
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1788
                            master_files)
1789
      self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1790
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1791

    
1792
      self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1793
      self._UpdateNodeInstances(node_i, nresult, nimg)
1794
      self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1795

    
1796
    feedback_fn("* Verifying instance status")
1797
    for instance in instancelist:
1798
      if verbose:
1799
        feedback_fn("* Verifying instance %s" % instance)
1800
      inst_config = instanceinfo[instance]
1801
      self._VerifyInstance(instance, inst_config, node_image)
1802
      inst_nodes_offline = []
1803

    
1804
      pnode = inst_config.primary_node
1805
      pnode_img = node_image[pnode]
1806
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1807
               self.ENODERPC, pnode, "instance %s, connection to"
1808
               " primary node failed", instance)
1809

    
1810
      if pnode_img.offline:
1811
        inst_nodes_offline.append(pnode)
1812

    
1813
      # If the instance is non-redundant we cannot survive losing its primary
1814
      # node, so we are not N+1 compliant. On the other hand we have no disk
1815
      # templates with more than one secondary so that situation is not well
1816
      # supported either.
1817
      # FIXME: does not support file-backed instances
1818
      if not inst_config.secondary_nodes:
1819
        i_non_redundant.append(instance)
1820
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1821
               instance, "instance has multiple secondary nodes: %s",
1822
               utils.CommaJoin(inst_config.secondary_nodes),
1823
               code=self.ETYPE_WARNING)
1824

    
1825
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1826
        i_non_a_balanced.append(instance)
1827

    
1828
      for snode in inst_config.secondary_nodes:
1829
        s_img = node_image[snode]
1830
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1831
                 "instance %s, connection to secondary node failed", instance)
1832

    
1833
        if s_img.offline:
1834
          inst_nodes_offline.append(snode)
1835

    
1836
      # warn that the instance lives on offline nodes
1837
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1838
               "instance lives on offline node(s) %s",
1839
               utils.CommaJoin(inst_nodes_offline))
1840
      # ... or ghost nodes
1841
      for node in inst_config.all_nodes:
1842
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
1843
                 "instance lives on ghost node %s", node)
1844

    
1845
    feedback_fn("* Verifying orphan volumes")
1846
    self._VerifyOrphanVolumes(node_vol_should, node_image)
1847

    
1848
    feedback_fn("* Verifying oprhan instances")
1849
    self._VerifyOrphanInstances(instancelist, node_image)
1850

    
1851
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1852
      feedback_fn("* Verifying N+1 Memory redundancy")
1853
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
1854

    
1855
    feedback_fn("* Other Notes")
1856
    if i_non_redundant:
1857
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1858
                  % len(i_non_redundant))
1859

    
1860
    if i_non_a_balanced:
1861
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1862
                  % len(i_non_a_balanced))
1863

    
1864
    if n_offline:
1865
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
1866

    
1867
    if n_drained:
1868
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
1869

    
1870
    return not self.bad
1871

    
1872
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1873
    """Analyze the post-hooks' result
1874

1875
    This method analyses the hook result, handles it, and sends some
1876
    nicely-formatted feedback back to the user.
1877

1878
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1879
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1880
    @param hooks_results: the results of the multi-node hooks rpc call
1881
    @param feedback_fn: function used send feedback back to the caller
1882
    @param lu_result: previous Exec result
1883
    @return: the new Exec result, based on the previous result
1884
        and hook results
1885

1886
    """
1887
    # We only really run POST phase hooks, and are only interested in
1888
    # their results
1889
    if phase == constants.HOOKS_PHASE_POST:
1890
      # Used to change hooks' output to proper indentation
1891
      indent_re = re.compile('^', re.M)
1892
      feedback_fn("* Hooks Results")
1893
      assert hooks_results, "invalid result from hooks"
1894

    
1895
      for node_name in hooks_results:
1896
        res = hooks_results[node_name]
1897
        msg = res.fail_msg
1898
        test = msg and not res.offline
1899
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
1900
                      "Communication failure in hooks execution: %s", msg)
1901
        if res.offline or msg:
1902
          # No need to investigate payload if node is offline or gave an error.
1903
          # override manually lu_result here as _ErrorIf only
1904
          # overrides self.bad
1905
          lu_result = 1
1906
          continue
1907
        for script, hkr, output in res.payload:
1908
          test = hkr == constants.HKR_FAIL
1909
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
1910
                        "Script %s failed, output:", script)
1911
          if test:
1912
            output = indent_re.sub('      ', output)
1913
            feedback_fn("%s" % output)
1914
            lu_result = 0
1915

    
1916
      return lu_result
1917

    
1918

    
1919
class LUVerifyDisks(NoHooksLU):
1920
  """Verifies the cluster disks status.
1921

1922
  """
1923
  _OP_REQP = []
1924
  REQ_BGL = False
1925

    
1926
  def ExpandNames(self):
1927
    self.needed_locks = {
1928
      locking.LEVEL_NODE: locking.ALL_SET,
1929
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1930
    }
1931
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1932

    
1933
  def CheckPrereq(self):
1934
    """Check prerequisites.
1935

1936
    This has no prerequisites.
1937

1938
    """
1939
    pass
1940

    
1941
  def Exec(self, feedback_fn):
1942
    """Verify integrity of cluster disks.
1943

1944
    @rtype: tuple of three items
1945
    @return: a tuple of (dict of node-to-node_error, list of instances
1946
        which need activate-disks, dict of instance: (node, volume) for
1947
        missing volumes
1948

1949
    """
1950
    result = res_nodes, res_instances, res_missing = {}, [], {}
1951

    
1952
    vg_name = self.cfg.GetVGName()
1953
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1954
    instances = [self.cfg.GetInstanceInfo(name)
1955
                 for name in self.cfg.GetInstanceList()]
1956

    
1957
    nv_dict = {}
1958
    for inst in instances:
1959
      inst_lvs = {}
1960
      if (not inst.admin_up or
1961
          inst.disk_template not in constants.DTS_NET_MIRROR):
1962
        continue
1963
      inst.MapLVsByNode(inst_lvs)
1964
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1965
      for node, vol_list in inst_lvs.iteritems():
1966
        for vol in vol_list:
1967
          nv_dict[(node, vol)] = inst
1968

    
1969
    if not nv_dict:
1970
      return result
1971

    
1972
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1973

    
1974
    for node in nodes:
1975
      # node_volume
1976
      node_res = node_lvs[node]
1977
      if node_res.offline:
1978
        continue
1979
      msg = node_res.fail_msg
1980
      if msg:
1981
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1982
        res_nodes[node] = msg
1983
        continue
1984

    
1985
      lvs = node_res.payload
1986
      for lv_name, (_, _, lv_online) in lvs.items():
1987
        inst = nv_dict.pop((node, lv_name), None)
1988
        if (not lv_online and inst is not None
1989
            and inst.name not in res_instances):
1990
          res_instances.append(inst.name)
1991

    
1992
    # any leftover items in nv_dict are missing LVs, let's arrange the
1993
    # data better
1994
    for key, inst in nv_dict.iteritems():
1995
      if inst.name not in res_missing:
1996
        res_missing[inst.name] = []
1997
      res_missing[inst.name].append(key)
1998

    
1999
    return result
2000

    
2001

    
2002
class LURepairDiskSizes(NoHooksLU):
2003
  """Verifies the cluster disks sizes.
2004

2005
  """
2006
  _OP_REQP = ["instances"]
2007
  REQ_BGL = False
2008

    
2009
  def ExpandNames(self):
2010
    if not isinstance(self.op.instances, list):
2011
      raise errors.OpPrereqError("Invalid argument type 'instances'",
2012
                                 errors.ECODE_INVAL)
2013

    
2014
    if self.op.instances:
2015
      self.wanted_names = []
2016
      for name in self.op.instances:
2017
        full_name = _ExpandInstanceName(self.cfg, name)
2018
        self.wanted_names.append(full_name)
2019
      self.needed_locks = {
2020
        locking.LEVEL_NODE: [],
2021
        locking.LEVEL_INSTANCE: self.wanted_names,
2022
        }
2023
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2024
    else:
2025
      self.wanted_names = None
2026
      self.needed_locks = {
2027
        locking.LEVEL_NODE: locking.ALL_SET,
2028
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2029
        }
2030
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2031

    
2032
  def DeclareLocks(self, level):
2033
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2034
      self._LockInstancesNodes(primary_only=True)
2035

    
2036
  def CheckPrereq(self):
2037
    """Check prerequisites.
2038

2039
    This only checks the optional instance list against the existing names.
2040

2041
    """
2042
    if self.wanted_names is None:
2043
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2044

    
2045
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2046
                             in self.wanted_names]
2047

    
2048
  def _EnsureChildSizes(self, disk):
2049
    """Ensure children of the disk have the needed disk size.
2050

2051
    This is valid mainly for DRBD8 and fixes an issue where the
2052
    children have smaller disk size.
2053

2054
    @param disk: an L{ganeti.objects.Disk} object
2055

2056
    """
2057
    if disk.dev_type == constants.LD_DRBD8:
2058
      assert disk.children, "Empty children for DRBD8?"
2059
      fchild = disk.children[0]
2060
      mismatch = fchild.size < disk.size
2061
      if mismatch:
2062
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2063
                     fchild.size, disk.size)
2064
        fchild.size = disk.size
2065

    
2066
      # and we recurse on this child only, not on the metadev
2067
      return self._EnsureChildSizes(fchild) or mismatch
2068
    else:
2069
      return False
2070

    
2071
  def Exec(self, feedback_fn):
2072
    """Verify the size of cluster disks.
2073

2074
    """
2075
    # TODO: check child disks too
2076
    # TODO: check differences in size between primary/secondary nodes
2077
    per_node_disks = {}
2078
    for instance in self.wanted_instances:
2079
      pnode = instance.primary_node
2080
      if pnode not in per_node_disks:
2081
        per_node_disks[pnode] = []
2082
      for idx, disk in enumerate(instance.disks):
2083
        per_node_disks[pnode].append((instance, idx, disk))
2084

    
2085
    changed = []
2086
    for node, dskl in per_node_disks.items():
2087
      newl = [v[2].Copy() for v in dskl]
2088
      for dsk in newl:
2089
        self.cfg.SetDiskID(dsk, node)
2090
      result = self.rpc.call_blockdev_getsizes(node, newl)
2091
      if result.fail_msg:
2092
        self.LogWarning("Failure in blockdev_getsizes call to node"
2093
                        " %s, ignoring", node)
2094
        continue
2095
      if len(result.data) != len(dskl):
2096
        self.LogWarning("Invalid result from node %s, ignoring node results",
2097
                        node)
2098
        continue
2099
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2100
        if size is None:
2101
          self.LogWarning("Disk %d of instance %s did not return size"
2102
                          " information, ignoring", idx, instance.name)
2103
          continue
2104
        if not isinstance(size, (int, long)):
2105
          self.LogWarning("Disk %d of instance %s did not return valid"
2106
                          " size information, ignoring", idx, instance.name)
2107
          continue
2108
        size = size >> 20
2109
        if size != disk.size:
2110
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2111
                       " correcting: recorded %d, actual %d", idx,
2112
                       instance.name, disk.size, size)
2113
          disk.size = size
2114
          self.cfg.Update(instance, feedback_fn)
2115
          changed.append((instance.name, idx, size))
2116
        if self._EnsureChildSizes(disk):
2117
          self.cfg.Update(instance, feedback_fn)
2118
          changed.append((instance.name, idx, disk.size))
2119
    return changed
2120

    
2121

    
2122
class LURenameCluster(LogicalUnit):
2123
  """Rename the cluster.
2124

2125
  """
2126
  HPATH = "cluster-rename"
2127
  HTYPE = constants.HTYPE_CLUSTER
2128
  _OP_REQP = ["name"]
2129

    
2130
  def BuildHooksEnv(self):
2131
    """Build hooks env.
2132

2133
    """
2134
    env = {
2135
      "OP_TARGET": self.cfg.GetClusterName(),
2136
      "NEW_NAME": self.op.name,
2137
      }
2138
    mn = self.cfg.GetMasterNode()
2139
    all_nodes = self.cfg.GetNodeList()
2140
    return env, [mn], all_nodes
2141

    
2142
  def CheckPrereq(self):
2143
    """Verify that the passed name is a valid one.
2144

2145
    """
2146
    hostname = utils.GetHostInfo(self.op.name)
2147

    
2148
    new_name = hostname.name
2149
    self.ip = new_ip = hostname.ip
2150
    old_name = self.cfg.GetClusterName()
2151
    old_ip = self.cfg.GetMasterIP()
2152
    if new_name == old_name and new_ip == old_ip:
2153
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2154
                                 " cluster has changed",
2155
                                 errors.ECODE_INVAL)
2156
    if new_ip != old_ip:
2157
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2158
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2159
                                   " reachable on the network. Aborting." %
2160
                                   new_ip, errors.ECODE_NOTUNIQUE)
2161

    
2162
    self.op.name = new_name
2163

    
2164
  def Exec(self, feedback_fn):
2165
    """Rename the cluster.
2166

2167
    """
2168
    clustername = self.op.name
2169
    ip = self.ip
2170

    
2171
    # shutdown the master IP
2172
    master = self.cfg.GetMasterNode()
2173
    result = self.rpc.call_node_stop_master(master, False)
2174
    result.Raise("Could not disable the master role")
2175

    
2176
    try:
2177
      cluster = self.cfg.GetClusterInfo()
2178
      cluster.cluster_name = clustername
2179
      cluster.master_ip = ip
2180
      self.cfg.Update(cluster, feedback_fn)
2181

    
2182
      # update the known hosts file
2183
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2184
      node_list = self.cfg.GetNodeList()
2185
      try:
2186
        node_list.remove(master)
2187
      except ValueError:
2188
        pass
2189
      result = self.rpc.call_upload_file(node_list,
2190
                                         constants.SSH_KNOWN_HOSTS_FILE)
2191
      for to_node, to_result in result.iteritems():
2192
        msg = to_result.fail_msg
2193
        if msg:
2194
          msg = ("Copy of file %s to node %s failed: %s" %
2195
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2196
          self.proc.LogWarning(msg)
2197

    
2198
    finally:
2199
      result = self.rpc.call_node_start_master(master, False, False)
2200
      msg = result.fail_msg
2201
      if msg:
2202
        self.LogWarning("Could not re-enable the master role on"
2203
                        " the master, please restart manually: %s", msg)
2204

    
2205

    
2206
def _RecursiveCheckIfLVMBased(disk):
2207
  """Check if the given disk or its children are lvm-based.
2208

2209
  @type disk: L{objects.Disk}
2210
  @param disk: the disk to check
2211
  @rtype: boolean
2212
  @return: boolean indicating whether a LD_LV dev_type was found or not
2213

2214
  """
2215
  if disk.children:
2216
    for chdisk in disk.children:
2217
      if _RecursiveCheckIfLVMBased(chdisk):
2218
        return True
2219
  return disk.dev_type == constants.LD_LV
2220

    
2221

    
2222
class LUSetClusterParams(LogicalUnit):
2223
  """Change the parameters of the cluster.
2224

2225
  """
2226
  HPATH = "cluster-modify"
2227
  HTYPE = constants.HTYPE_CLUSTER
2228
  _OP_REQP = []
2229
  REQ_BGL = False
2230

    
2231
  def CheckArguments(self):
2232
    """Check parameters
2233

2234
    """
2235
    if not hasattr(self.op, "candidate_pool_size"):
2236
      self.op.candidate_pool_size = None
2237
    if self.op.candidate_pool_size is not None:
2238
      try:
2239
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2240
      except (ValueError, TypeError), err:
2241
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2242
                                   str(err), errors.ECODE_INVAL)
2243
      if self.op.candidate_pool_size < 1:
2244
        raise errors.OpPrereqError("At least one master candidate needed",
2245
                                   errors.ECODE_INVAL)
2246
    _CheckBooleanOpField(self.op, "maintain_node_health")
2247

    
2248
  def ExpandNames(self):
2249
    # FIXME: in the future maybe other cluster params won't require checking on
2250
    # all nodes to be modified.
2251
    self.needed_locks = {
2252
      locking.LEVEL_NODE: locking.ALL_SET,
2253
    }
2254
    self.share_locks[locking.LEVEL_NODE] = 1
2255

    
2256
  def BuildHooksEnv(self):
2257
    """Build hooks env.
2258

2259
    """
2260
    env = {
2261
      "OP_TARGET": self.cfg.GetClusterName(),
2262
      "NEW_VG_NAME": self.op.vg_name,
2263
      }
2264
    mn = self.cfg.GetMasterNode()
2265
    return env, [mn], [mn]
2266

    
2267
  def CheckPrereq(self):
2268
    """Check prerequisites.
2269

2270
    This checks whether the given params don't conflict and
2271
    if the given volume group is valid.
2272

2273
    """
2274
    if self.op.vg_name is not None and not self.op.vg_name:
2275
      instances = self.cfg.GetAllInstancesInfo().values()
2276
      for inst in instances:
2277
        for disk in inst.disks:
2278
          if _RecursiveCheckIfLVMBased(disk):
2279
            raise errors.OpPrereqError("Cannot disable lvm storage while"
2280
                                       " lvm-based instances exist",
2281
                                       errors.ECODE_INVAL)
2282

    
2283
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2284

    
2285
    # if vg_name not None, checks given volume group on all nodes
2286
    if self.op.vg_name:
2287
      vglist = self.rpc.call_vg_list(node_list)
2288
      for node in node_list:
2289
        msg = vglist[node].fail_msg
2290
        if msg:
2291
          # ignoring down node
2292
          self.LogWarning("Error while gathering data on node %s"
2293
                          " (ignoring node): %s", node, msg)
2294
          continue
2295
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2296
                                              self.op.vg_name,
2297
                                              constants.MIN_VG_SIZE)
2298
        if vgstatus:
2299
          raise errors.OpPrereqError("Error on node '%s': %s" %
2300
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2301

    
2302
    self.cluster = cluster = self.cfg.GetClusterInfo()
2303
    # validate params changes
2304
    if self.op.beparams:
2305
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2306
      self.new_beparams = objects.FillDict(
2307
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2308

    
2309
    if self.op.nicparams:
2310
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2311
      self.new_nicparams = objects.FillDict(
2312
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2313
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2314
      nic_errors = []
2315

    
2316
      # check all instances for consistency
2317
      for instance in self.cfg.GetAllInstancesInfo().values():
2318
        for nic_idx, nic in enumerate(instance.nics):
2319
          params_copy = copy.deepcopy(nic.nicparams)
2320
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2321

    
2322
          # check parameter syntax
2323
          try:
2324
            objects.NIC.CheckParameterSyntax(params_filled)
2325
          except errors.ConfigurationError, err:
2326
            nic_errors.append("Instance %s, nic/%d: %s" %
2327
                              (instance.name, nic_idx, err))
2328

    
2329
          # if we're moving instances to routed, check that they have an ip
2330
          target_mode = params_filled[constants.NIC_MODE]
2331
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2332
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2333
                              (instance.name, nic_idx))
2334
      if nic_errors:
2335
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2336
                                   "\n".join(nic_errors))
2337

    
2338
    # hypervisor list/parameters
2339
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
2340
    if self.op.hvparams:
2341
      if not isinstance(self.op.hvparams, dict):
2342
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2343
                                   errors.ECODE_INVAL)
2344
      for hv_name, hv_dict in self.op.hvparams.items():
2345
        if hv_name not in self.new_hvparams:
2346
          self.new_hvparams[hv_name] = hv_dict
2347
        else:
2348
          self.new_hvparams[hv_name].update(hv_dict)
2349

    
2350
    # os hypervisor parameters
2351
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2352
    if self.op.os_hvp:
2353
      if not isinstance(self.op.os_hvp, dict):
2354
        raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2355
                                   errors.ECODE_INVAL)
2356
      for os_name, hvs in self.op.os_hvp.items():
2357
        if not isinstance(hvs, dict):
2358
          raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2359
                                      " input"), errors.ECODE_INVAL)
2360
        if os_name not in self.new_os_hvp:
2361
          self.new_os_hvp[os_name] = hvs
2362
        else:
2363
          for hv_name, hv_dict in hvs.items():
2364
            if hv_name not in self.new_os_hvp[os_name]:
2365
              self.new_os_hvp[os_name][hv_name] = hv_dict
2366
            else:
2367
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2368

    
2369
    if self.op.enabled_hypervisors is not None:
2370
      self.hv_list = self.op.enabled_hypervisors
2371
      if not self.hv_list:
2372
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2373
                                   " least one member",
2374
                                   errors.ECODE_INVAL)
2375
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2376
      if invalid_hvs:
2377
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2378
                                   " entries: %s" %
2379
                                   utils.CommaJoin(invalid_hvs),
2380
                                   errors.ECODE_INVAL)
2381
    else:
2382
      self.hv_list = cluster.enabled_hypervisors
2383

    
2384
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2385
      # either the enabled list has changed, or the parameters have, validate
2386
      for hv_name, hv_params in self.new_hvparams.items():
2387
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2388
            (self.op.enabled_hypervisors and
2389
             hv_name in self.op.enabled_hypervisors)):
2390
          # either this is a new hypervisor, or its parameters have changed
2391
          hv_class = hypervisor.GetHypervisor(hv_name)
2392
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2393
          hv_class.CheckParameterSyntax(hv_params)
2394
          _CheckHVParams(self, node_list, hv_name, hv_params)
2395

    
2396
    if self.op.os_hvp:
2397
      # no need to check any newly-enabled hypervisors, since the
2398
      # defaults have already been checked in the above code-block
2399
      for os_name, os_hvp in self.new_os_hvp.items():
2400
        for hv_name, hv_params in os_hvp.items():
2401
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2402
          # we need to fill in the new os_hvp on top of the actual hv_p
2403
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2404
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2405
          hv_class = hypervisor.GetHypervisor(hv_name)
2406
          hv_class.CheckParameterSyntax(new_osp)
2407
          _CheckHVParams(self, node_list, hv_name, new_osp)
2408

    
2409

    
2410
  def Exec(self, feedback_fn):
2411
    """Change the parameters of the cluster.
2412

2413
    """
2414
    if self.op.vg_name is not None:
2415
      new_volume = self.op.vg_name
2416
      if not new_volume:
2417
        new_volume = None
2418
      if new_volume != self.cfg.GetVGName():
2419
        self.cfg.SetVGName(new_volume)
2420
      else:
2421
        feedback_fn("Cluster LVM configuration already in desired"
2422
                    " state, not changing")
2423
    if self.op.hvparams:
2424
      self.cluster.hvparams = self.new_hvparams
2425
    if self.op.os_hvp:
2426
      self.cluster.os_hvp = self.new_os_hvp
2427
    if self.op.enabled_hypervisors is not None:
2428
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2429
    if self.op.beparams:
2430
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2431
    if self.op.nicparams:
2432
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2433

    
2434
    if self.op.candidate_pool_size is not None:
2435
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2436
      # we need to update the pool size here, otherwise the save will fail
2437
      _AdjustCandidatePool(self, [])
2438

    
2439
    if self.op.maintain_node_health is not None:
2440
      self.cluster.maintain_node_health = self.op.maintain_node_health
2441

    
2442
    self.cfg.Update(self.cluster, feedback_fn)
2443

    
2444

    
2445
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2446
  """Distribute additional files which are part of the cluster configuration.
2447

2448
  ConfigWriter takes care of distributing the config and ssconf files, but
2449
  there are more files which should be distributed to all nodes. This function
2450
  makes sure those are copied.
2451

2452
  @param lu: calling logical unit
2453
  @param additional_nodes: list of nodes not in the config to distribute to
2454

2455
  """
2456
  # 1. Gather target nodes
2457
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2458
  dist_nodes = lu.cfg.GetOnlineNodeList()
2459
  if additional_nodes is not None:
2460
    dist_nodes.extend(additional_nodes)
2461
  if myself.name in dist_nodes:
2462
    dist_nodes.remove(myself.name)
2463

    
2464
  # 2. Gather files to distribute
2465
  dist_files = set([constants.ETC_HOSTS,
2466
                    constants.SSH_KNOWN_HOSTS_FILE,
2467
                    constants.RAPI_CERT_FILE,
2468
                    constants.RAPI_USERS_FILE,
2469
                    constants.CONFD_HMAC_KEY,
2470
                   ])
2471

    
2472
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2473
  for hv_name in enabled_hypervisors:
2474
    hv_class = hypervisor.GetHypervisor(hv_name)
2475
    dist_files.update(hv_class.GetAncillaryFiles())
2476

    
2477
  # 3. Perform the files upload
2478
  for fname in dist_files:
2479
    if os.path.exists(fname):
2480
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2481
      for to_node, to_result in result.items():
2482
        msg = to_result.fail_msg
2483
        if msg:
2484
          msg = ("Copy of file %s to node %s failed: %s" %
2485
                 (fname, to_node, msg))
2486
          lu.proc.LogWarning(msg)
2487

    
2488

    
2489
class LURedistributeConfig(NoHooksLU):
2490
  """Force the redistribution of cluster configuration.
2491

2492
  This is a very simple LU.
2493

2494
  """
2495
  _OP_REQP = []
2496
  REQ_BGL = False
2497

    
2498
  def ExpandNames(self):
2499
    self.needed_locks = {
2500
      locking.LEVEL_NODE: locking.ALL_SET,
2501
    }
2502
    self.share_locks[locking.LEVEL_NODE] = 1
2503

    
2504
  def CheckPrereq(self):
2505
    """Check prerequisites.
2506

2507
    """
2508

    
2509
  def Exec(self, feedback_fn):
2510
    """Redistribute the configuration.
2511

2512
    """
2513
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2514
    _RedistributeAncillaryFiles(self)
2515

    
2516

    
2517
def _WaitForSync(lu, instance, oneshot=False):
2518
  """Sleep and poll for an instance's disk to sync.
2519

2520
  """
2521
  if not instance.disks:
2522
    return True
2523

    
2524
  if not oneshot:
2525
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2526

    
2527
  node = instance.primary_node
2528

    
2529
  for dev in instance.disks:
2530
    lu.cfg.SetDiskID(dev, node)
2531

    
2532
  # TODO: Convert to utils.Retry
2533

    
2534
  retries = 0
2535
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2536
  while True:
2537
    max_time = 0
2538
    done = True
2539
    cumul_degraded = False
2540
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2541
    msg = rstats.fail_msg
2542
    if msg:
2543
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2544
      retries += 1
2545
      if retries >= 10:
2546
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2547
                                 " aborting." % node)
2548
      time.sleep(6)
2549
      continue
2550
    rstats = rstats.payload
2551
    retries = 0
2552
    for i, mstat in enumerate(rstats):
2553
      if mstat is None:
2554
        lu.LogWarning("Can't compute data for node %s/%s",
2555
                           node, instance.disks[i].iv_name)
2556
        continue
2557

    
2558
      cumul_degraded = (cumul_degraded or
2559
                        (mstat.is_degraded and mstat.sync_percent is None))
2560
      if mstat.sync_percent is not None:
2561
        done = False
2562
        if mstat.estimated_time is not None:
2563
          rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2564
          max_time = mstat.estimated_time
2565
        else:
2566
          rem_time = "no time estimate"
2567
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2568
                        (instance.disks[i].iv_name, mstat.sync_percent,
2569
                         rem_time))
2570

    
2571
    # if we're done but degraded, let's do a few small retries, to
2572
    # make sure we see a stable and not transient situation; therefore
2573
    # we force restart of the loop
2574
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2575
      logging.info("Degraded disks found, %d retries left", degr_retries)
2576
      degr_retries -= 1
2577
      time.sleep(1)
2578
      continue
2579

    
2580
    if done or oneshot:
2581
      break
2582

    
2583
    time.sleep(min(60, max_time))
2584

    
2585
  if done:
2586
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2587
  return not cumul_degraded
2588

    
2589

    
2590
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2591
  """Check that mirrors are not degraded.
2592

2593
  The ldisk parameter, if True, will change the test from the
2594
  is_degraded attribute (which represents overall non-ok status for
2595
  the device(s)) to the ldisk (representing the local storage status).
2596

2597
  """
2598
  lu.cfg.SetDiskID(dev, node)
2599

    
2600
  result = True
2601

    
2602
  if on_primary or dev.AssembleOnSecondary():
2603
    rstats = lu.rpc.call_blockdev_find(node, dev)
2604
    msg = rstats.fail_msg
2605
    if msg:
2606
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2607
      result = False
2608
    elif not rstats.payload:
2609
      lu.LogWarning("Can't find disk on node %s", node)
2610
      result = False
2611
    else:
2612
      if ldisk:
2613
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2614
      else:
2615
        result = result and not rstats.payload.is_degraded
2616

    
2617
  if dev.children:
2618
    for child in dev.children:
2619
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2620

    
2621
  return result
2622

    
2623

    
2624
class LUDiagnoseOS(NoHooksLU):
2625
  """Logical unit for OS diagnose/query.
2626

2627
  """
2628
  _OP_REQP = ["output_fields", "names"]
2629
  REQ_BGL = False
2630
  _FIELDS_STATIC = utils.FieldSet()
2631
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2632
  # Fields that need calculation of global os validity
2633
  _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2634

    
2635
  def ExpandNames(self):
2636
    if self.op.names:
2637
      raise errors.OpPrereqError("Selective OS query not supported",
2638
                                 errors.ECODE_INVAL)
2639

    
2640
    _CheckOutputFields(static=self._FIELDS_STATIC,
2641
                       dynamic=self._FIELDS_DYNAMIC,
2642
                       selected=self.op.output_fields)
2643

    
2644
    # Lock all nodes, in shared mode
2645
    # Temporary removal of locks, should be reverted later
2646
    # TODO: reintroduce locks when they are lighter-weight
2647
    self.needed_locks = {}
2648
    #self.share_locks[locking.LEVEL_NODE] = 1
2649
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2650

    
2651
  def CheckPrereq(self):
2652
    """Check prerequisites.
2653

2654
    """
2655

    
2656
  @staticmethod
2657
  def _DiagnoseByOS(rlist):
2658
    """Remaps a per-node return list into an a per-os per-node dictionary
2659

2660
    @param rlist: a map with node names as keys and OS objects as values
2661

2662
    @rtype: dict
2663
    @return: a dictionary with osnames as keys and as value another map, with
2664
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
2665

2666
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2667
                                     (/srv/..., False, "invalid api")],
2668
                           "node2": [(/srv/..., True, "")]}
2669
          }
2670

2671
    """
2672
    all_os = {}
2673
    # we build here the list of nodes that didn't fail the RPC (at RPC
2674
    # level), so that nodes with a non-responding node daemon don't
2675
    # make all OSes invalid
2676
    good_nodes = [node_name for node_name in rlist
2677
                  if not rlist[node_name].fail_msg]
2678
    for node_name, nr in rlist.items():
2679
      if nr.fail_msg or not nr.payload:
2680
        continue
2681
      for name, path, status, diagnose, variants in nr.payload:
2682
        if name not in all_os:
2683
          # build a list of nodes for this os containing empty lists
2684
          # for each node in node_list
2685
          all_os[name] = {}
2686
          for nname in good_nodes:
2687
            all_os[name][nname] = []
2688
        all_os[name][node_name].append((path, status, diagnose, variants))
2689
    return all_os
2690

    
2691
  def Exec(self, feedback_fn):
2692
    """Compute the list of OSes.
2693

2694
    """
2695
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2696
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2697
    pol = self._DiagnoseByOS(node_data)
2698
    output = []
2699
    calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2700
    calc_variants = "variants" in self.op.output_fields
2701

    
2702
    for os_name, os_data in pol.items():
2703
      row = []
2704
      if calc_valid:
2705
        valid = True
2706
        variants = None
2707
        for osl in os_data.values():
2708
          valid = valid and osl and osl[0][1]
2709
          if not valid:
2710
            variants = None
2711
            break
2712
          if calc_variants:
2713
            node_variants = osl[0][3]
2714
            if variants is None:
2715
              variants = node_variants
2716
            else:
2717
              variants = [v for v in variants if v in node_variants]
2718

    
2719
      for field in self.op.output_fields:
2720
        if field == "name":
2721
          val = os_name
2722
        elif field == "valid":
2723
          val = valid
2724
        elif field == "node_status":
2725
          # this is just a copy of the dict
2726
          val = {}
2727
          for node_name, nos_list in os_data.items():
2728
            val[node_name] = nos_list
2729
        elif field == "variants":
2730
          val =  variants
2731
        else:
2732
          raise errors.ParameterError(field)
2733
        row.append(val)
2734
      output.append(row)
2735

    
2736
    return output
2737

    
2738

    
2739
class LURemoveNode(LogicalUnit):
2740
  """Logical unit for removing a node.
2741

2742
  """
2743
  HPATH = "node-remove"
2744
  HTYPE = constants.HTYPE_NODE
2745
  _OP_REQP = ["node_name"]
2746

    
2747
  def BuildHooksEnv(self):
2748
    """Build hooks env.
2749

2750
    This doesn't run on the target node in the pre phase as a failed
2751
    node would then be impossible to remove.
2752

2753
    """
2754
    env = {
2755
      "OP_TARGET": self.op.node_name,
2756
      "NODE_NAME": self.op.node_name,
2757
      }
2758
    all_nodes = self.cfg.GetNodeList()
2759
    try:
2760
      all_nodes.remove(self.op.node_name)
2761
    except ValueError:
2762
      logging.warning("Node %s which is about to be removed not found"
2763
                      " in the all nodes list", self.op.node_name)
2764
    return env, all_nodes, all_nodes
2765

    
2766
  def CheckPrereq(self):
2767
    """Check prerequisites.
2768

2769
    This checks:
2770
     - the node exists in the configuration
2771
     - it does not have primary or secondary instances
2772
     - it's not the master
2773

2774
    Any errors are signaled by raising errors.OpPrereqError.
2775

2776
    """
2777
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2778
    node = self.cfg.GetNodeInfo(self.op.node_name)
2779
    assert node is not None
2780

    
2781
    instance_list = self.cfg.GetInstanceList()
2782

    
2783
    masternode = self.cfg.GetMasterNode()
2784
    if node.name == masternode:
2785
      raise errors.OpPrereqError("Node is the master node,"
2786
                                 " you need to failover first.",
2787
                                 errors.ECODE_INVAL)
2788

    
2789
    for instance_name in instance_list:
2790
      instance = self.cfg.GetInstanceInfo(instance_name)
2791
      if node.name in instance.all_nodes:
2792
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2793
                                   " please remove first." % instance_name,
2794
                                   errors.ECODE_INVAL)
2795
    self.op.node_name = node.name
2796
    self.node = node
2797

    
2798
  def Exec(self, feedback_fn):
2799
    """Removes the node from the cluster.
2800

2801
    """
2802
    node = self.node
2803
    logging.info("Stopping the node daemon and removing configs from node %s",
2804
                 node.name)
2805

    
2806
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2807

    
2808
    # Promote nodes to master candidate as needed
2809
    _AdjustCandidatePool(self, exceptions=[node.name])
2810
    self.context.RemoveNode(node.name)
2811

    
2812
    # Run post hooks on the node before it's removed
2813
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2814
    try:
2815
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2816
    except:
2817
      # pylint: disable-msg=W0702
2818
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
2819

    
2820
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2821
    msg = result.fail_msg
2822
    if msg:
2823
      self.LogWarning("Errors encountered on the remote node while leaving"
2824
                      " the cluster: %s", msg)
2825

    
2826

    
2827
class LUQueryNodes(NoHooksLU):
2828
  """Logical unit for querying nodes.
2829

2830
  """
2831
  # pylint: disable-msg=W0142
2832
  _OP_REQP = ["output_fields", "names", "use_locking"]
2833
  REQ_BGL = False
2834

    
2835
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2836
                    "master_candidate", "offline", "drained"]
2837

    
2838
  _FIELDS_DYNAMIC = utils.FieldSet(
2839
    "dtotal", "dfree",
2840
    "mtotal", "mnode", "mfree",
2841
    "bootid",
2842
    "ctotal", "cnodes", "csockets",
2843
    )
2844

    
2845
  _FIELDS_STATIC = utils.FieldSet(*[
2846
    "pinst_cnt", "sinst_cnt",
2847
    "pinst_list", "sinst_list",
2848
    "pip", "sip", "tags",
2849
    "master",
2850
    "role"] + _SIMPLE_FIELDS
2851
    )
2852

    
2853
  def ExpandNames(self):
2854
    _CheckOutputFields(static=self._FIELDS_STATIC,
2855
                       dynamic=self._FIELDS_DYNAMIC,
2856
                       selected=self.op.output_fields)
2857

    
2858
    self.needed_locks = {}
2859
    self.share_locks[locking.LEVEL_NODE] = 1
2860

    
2861
    if self.op.names:
2862
      self.wanted = _GetWantedNodes(self, self.op.names)
2863
    else:
2864
      self.wanted = locking.ALL_SET
2865

    
2866
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2867
    self.do_locking = self.do_node_query and self.op.use_locking
2868
    if self.do_locking:
2869
      # if we don't request only static fields, we need to lock the nodes
2870
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2871

    
2872
  def CheckPrereq(self):
2873
    """Check prerequisites.
2874

2875
    """
2876
    # The validation of the node list is done in the _GetWantedNodes,
2877
    # if non empty, and if empty, there's no validation to do
2878
    pass
2879

    
2880
  def Exec(self, feedback_fn):
2881
    """Computes the list of nodes and their attributes.
2882

2883
    """
2884
    all_info = self.cfg.GetAllNodesInfo()
2885
    if self.do_locking:
2886
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2887
    elif self.wanted != locking.ALL_SET:
2888
      nodenames = self.wanted
2889
      missing = set(nodenames).difference(all_info.keys())
2890
      if missing:
2891
        raise errors.OpExecError(
2892
          "Some nodes were removed before retrieving their data: %s" % missing)
2893
    else:
2894
      nodenames = all_info.keys()
2895

    
2896
    nodenames = utils.NiceSort(nodenames)
2897
    nodelist = [all_info[name] for name in nodenames]
2898

    
2899
    # begin data gathering
2900

    
2901
    if self.do_node_query:
2902
      live_data = {}
2903
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2904
                                          self.cfg.GetHypervisorType())
2905
      for name in nodenames:
2906
        nodeinfo = node_data[name]
2907
        if not nodeinfo.fail_msg and nodeinfo.payload:
2908
          nodeinfo = nodeinfo.payload
2909
          fn = utils.TryConvert
2910
          live_data[name] = {
2911
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2912
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2913
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2914
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2915
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2916
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2917
            "bootid": nodeinfo.get('bootid', None),
2918
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2919
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2920
            }
2921
        else:
2922
          live_data[name] = {}
2923
    else:
2924
      live_data = dict.fromkeys(nodenames, {})
2925

    
2926
    node_to_primary = dict([(name, set()) for name in nodenames])
2927
    node_to_secondary = dict([(name, set()) for name in nodenames])
2928

    
2929
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2930
                             "sinst_cnt", "sinst_list"))
2931
    if inst_fields & frozenset(self.op.output_fields):
2932
      inst_data = self.cfg.GetAllInstancesInfo()
2933

    
2934
      for inst in inst_data.values():
2935
        if inst.primary_node in node_to_primary:
2936
          node_to_primary[inst.primary_node].add(inst.name)
2937
        for secnode in inst.secondary_nodes:
2938
          if secnode in node_to_secondary:
2939
            node_to_secondary[secnode].add(inst.name)
2940

    
2941
    master_node = self.cfg.GetMasterNode()
2942

    
2943
    # end data gathering
2944

    
2945
    output = []
2946
    for node in nodelist:
2947
      node_output = []
2948
      for field in self.op.output_fields:
2949
        if field in self._SIMPLE_FIELDS:
2950
          val = getattr(node, field)
2951
        elif field == "pinst_list":
2952
          val = list(node_to_primary[node.name])
2953
        elif field == "sinst_list":
2954
          val = list(node_to_secondary[node.name])
2955
        elif field == "pinst_cnt":
2956
          val = len(node_to_primary[node.name])
2957
        elif field == "sinst_cnt":
2958
          val = len(node_to_secondary[node.name])
2959
        elif field == "pip":
2960
          val = node.primary_ip
2961
        elif field == "sip":
2962
          val = node.secondary_ip
2963
        elif field == "tags":
2964
          val = list(node.GetTags())
2965
        elif field == "master":
2966
          val = node.name == master_node
2967
        elif self._FIELDS_DYNAMIC.Matches(field):
2968
          val = live_data[node.name].get(field, None)
2969
        elif field == "role":
2970
          if node.name == master_node:
2971
            val = "M"
2972
          elif node.master_candidate:
2973
            val = "C"
2974
          elif node.drained:
2975
            val = "D"
2976
          elif node.offline:
2977
            val = "O"
2978
          else:
2979
            val = "R"
2980
        else:
2981
          raise errors.ParameterError(field)
2982
        node_output.append(val)
2983
      output.append(node_output)
2984

    
2985
    return output
2986

    
2987

    
2988
class LUQueryNodeVolumes(NoHooksLU):
2989
  """Logical unit for getting volumes on node(s).
2990

2991
  """
2992
  _OP_REQP = ["nodes", "output_fields"]
2993
  REQ_BGL = False
2994
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2995
  _FIELDS_STATIC = utils.FieldSet("node")
2996

    
2997
  def ExpandNames(self):
2998
    _CheckOutputFields(static=self._FIELDS_STATIC,
2999
                       dynamic=self._FIELDS_DYNAMIC,
3000
                       selected=self.op.output_fields)
3001

    
3002
    self.needed_locks = {}
3003
    self.share_locks[locking.LEVEL_NODE] = 1
3004
    if not self.op.nodes:
3005
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3006
    else:
3007
      self.needed_locks[locking.LEVEL_NODE] = \
3008
        _GetWantedNodes(self, self.op.nodes)
3009

    
3010
  def CheckPrereq(self):
3011
    """Check prerequisites.
3012

3013
    This checks that the fields required are valid output fields.
3014

3015
    """
3016
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3017

    
3018
  def Exec(self, feedback_fn):
3019
    """Computes the list of nodes and their attributes.
3020

3021
    """
3022
    nodenames = self.nodes
3023
    volumes = self.rpc.call_node_volumes(nodenames)
3024

    
3025
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3026
             in self.cfg.GetInstanceList()]
3027

    
3028
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3029

    
3030
    output = []
3031
    for node in nodenames:
3032
      nresult = volumes[node]
3033
      if nresult.offline:
3034
        continue
3035
      msg = nresult.fail_msg
3036
      if msg:
3037
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3038
        continue
3039

    
3040
      node_vols = nresult.payload[:]
3041
      node_vols.sort(key=lambda vol: vol['dev'])
3042

    
3043
      for vol in node_vols:
3044
        node_output = []
3045
        for field in self.op.output_fields:
3046
          if field == "node":
3047
            val = node
3048
          elif field == "phys":
3049
            val = vol['dev']
3050
          elif field == "vg":
3051
            val = vol['vg']
3052
          elif field == "name":
3053
            val = vol['name']
3054
          elif field == "size":
3055
            val = int(float(vol['size']))
3056
          elif field == "instance":
3057
            for inst in ilist:
3058
              if node not in lv_by_node[inst]:
3059
                continue
3060
              if vol['name'] in lv_by_node[inst][node]:
3061
                val = inst.name
3062
                break
3063
            else:
3064
              val = '-'
3065
          else:
3066
            raise errors.ParameterError(field)
3067
          node_output.append(str(val))
3068

    
3069
        output.append(node_output)
3070

    
3071
    return output
3072

    
3073

    
3074
class LUQueryNodeStorage(NoHooksLU):
3075
  """Logical unit for getting information on storage units on node(s).
3076

3077
  """
3078
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
3079
  REQ_BGL = False
3080
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3081

    
3082
  def ExpandNames(self):
3083
    storage_type = self.op.storage_type
3084

    
3085
    if storage_type not in constants.VALID_STORAGE_TYPES:
3086
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
3087
                                 errors.ECODE_INVAL)
3088

    
3089
    _CheckOutputFields(static=self._FIELDS_STATIC,
3090
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3091
                       selected=self.op.output_fields)
3092

    
3093
    self.needed_locks = {}
3094
    self.share_locks[locking.LEVEL_NODE] = 1
3095

    
3096
    if self.op.nodes:
3097
      self.needed_locks[locking.LEVEL_NODE] = \
3098
        _GetWantedNodes(self, self.op.nodes)
3099
    else:
3100
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3101

    
3102
  def CheckPrereq(self):
3103
    """Check prerequisites.
3104

3105
    This checks that the fields required are valid output fields.
3106

3107
    """
3108
    self.op.name = getattr(self.op, "name", None)
3109

    
3110
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3111

    
3112
  def Exec(self, feedback_fn):
3113
    """Computes the list of nodes and their attributes.
3114

3115
    """
3116
    # Always get name to sort by
3117
    if constants.SF_NAME in self.op.output_fields:
3118
      fields = self.op.output_fields[:]
3119
    else:
3120
      fields = [constants.SF_NAME] + self.op.output_fields
3121

    
3122
    # Never ask for node or type as it's only known to the LU
3123
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3124
      while extra in fields:
3125
        fields.remove(extra)
3126

    
3127
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3128
    name_idx = field_idx[constants.SF_NAME]
3129

    
3130
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3131
    data = self.rpc.call_storage_list(self.nodes,
3132
                                      self.op.storage_type, st_args,
3133
                                      self.op.name, fields)
3134

    
3135
    result = []
3136

    
3137
    for node in utils.NiceSort(self.nodes):
3138
      nresult = data[node]
3139
      if nresult.offline:
3140
        continue
3141

    
3142
      msg = nresult.fail_msg
3143
      if msg:
3144
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3145
        continue
3146

    
3147
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3148

    
3149
      for name in utils.NiceSort(rows.keys()):
3150
        row = rows[name]
3151

    
3152
        out = []
3153

    
3154
        for field in self.op.output_fields:
3155
          if field == constants.SF_NODE:
3156
            val = node
3157
          elif field == constants.SF_TYPE:
3158
            val = self.op.storage_type
3159
          elif field in field_idx:
3160
            val = row[field_idx[field]]
3161
          else:
3162
            raise errors.ParameterError(field)
3163

    
3164
          out.append(val)
3165

    
3166
        result.append(out)
3167

    
3168
    return result
3169

    
3170

    
3171
class LUModifyNodeStorage(NoHooksLU):
3172
  """Logical unit for modifying a storage volume on a node.
3173

3174
  """
3175
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3176
  REQ_BGL = False
3177

    
3178
  def CheckArguments(self):
3179
    self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
3180

    
3181
    storage_type = self.op.storage_type
3182
    if storage_type not in constants.VALID_STORAGE_TYPES:
3183
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
3184
                                 errors.ECODE_INVAL)
3185

    
3186
  def ExpandNames(self):
3187
    self.needed_locks = {
3188
      locking.LEVEL_NODE: self.op.node_name,
3189
      }
3190

    
3191
  def CheckPrereq(self):
3192
    """Check prerequisites.
3193

3194
    """
3195
    storage_type = self.op.storage_type
3196

    
3197
    try:
3198
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3199
    except KeyError:
3200
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3201
                                 " modified" % storage_type,
3202
                                 errors.ECODE_INVAL)
3203

    
3204
    diff = set(self.op.changes.keys()) - modifiable
3205
    if diff:
3206
      raise errors.OpPrereqError("The following fields can not be modified for"
3207
                                 " storage units of type '%s': %r" %
3208
                                 (storage_type, list(diff)),
3209
                                 errors.ECODE_INVAL)
3210

    
3211
  def Exec(self, feedback_fn):
3212
    """Computes the list of nodes and their attributes.
3213

3214
    """
3215
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3216
    result = self.rpc.call_storage_modify(self.op.node_name,
3217
                                          self.op.storage_type, st_args,
3218
                                          self.op.name, self.op.changes)
3219
    result.Raise("Failed to modify storage unit '%s' on %s" %
3220
                 (self.op.name, self.op.node_name))
3221

    
3222

    
3223
class LUAddNode(LogicalUnit):
3224
  """Logical unit for adding node to the cluster.
3225

3226
  """
3227
  HPATH = "node-add"
3228
  HTYPE = constants.HTYPE_NODE
3229
  _OP_REQP = ["node_name"]
3230

    
3231
  def CheckArguments(self):
3232
    # validate/normalize the node name
3233
    self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3234

    
3235
  def BuildHooksEnv(self):
3236
    """Build hooks env.
3237

3238
    This will run on all nodes before, and on all nodes + the new node after.
3239

3240
    """
3241
    env = {
3242
      "OP_TARGET": self.op.node_name,
3243
      "NODE_NAME": self.op.node_name,
3244
      "NODE_PIP": self.op.primary_ip,
3245
      "NODE_SIP": self.op.secondary_ip,
3246
      }
3247
    nodes_0 = self.cfg.GetNodeList()
3248
    nodes_1 = nodes_0 + [self.op.node_name, ]
3249
    return env, nodes_0, nodes_1
3250

    
3251
  def CheckPrereq(self):
3252
    """Check prerequisites.
3253

3254
    This checks:
3255
     - the new node is not already in the config
3256
     - it is resolvable
3257
     - its parameters (single/dual homed) matches the cluster
3258

3259
    Any errors are signaled by raising errors.OpPrereqError.
3260

3261
    """
3262
    node_name = self.op.node_name
3263
    cfg = self.cfg
3264

    
3265
    dns_data = utils.GetHostInfo(node_name)
3266

    
3267
    node = dns_data.name
3268
    primary_ip = self.op.primary_ip = dns_data.ip
3269
    secondary_ip = getattr(self.op, "secondary_ip", None)
3270
    if secondary_ip is None:
3271
      secondary_ip = primary_ip
3272
    if not utils.IsValidIP(secondary_ip):
3273
      raise errors.OpPrereqError("Invalid secondary IP given",
3274
                                 errors.ECODE_INVAL)
3275
    self.op.secondary_ip = secondary_ip
3276

    
3277
    node_list = cfg.GetNodeList()
3278
    if not self.op.readd and node in node_list:
3279
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3280
                                 node, errors.ECODE_EXISTS)
3281
    elif self.op.readd and node not in node_list:
3282
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3283
                                 errors.ECODE_NOENT)
3284

    
3285
    for existing_node_name in node_list:
3286
      existing_node = cfg.GetNodeInfo(existing_node_name)
3287

    
3288
      if self.op.readd and node == existing_node_name:
3289
        if (existing_node.primary_ip != primary_ip or
3290
            existing_node.secondary_ip != secondary_ip):
3291
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3292
                                     " address configuration as before",
3293
                                     errors.ECODE_INVAL)
3294
        continue
3295

    
3296
      if (existing_node.primary_ip == primary_ip or
3297
          existing_node.secondary_ip == primary_ip or
3298
          existing_node.primary_ip == secondary_ip or
3299
          existing_node.secondary_ip == secondary_ip):
3300
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3301
                                   " existing node %s" % existing_node.name,
3302
                                   errors.ECODE_NOTUNIQUE)
3303

    
3304
    # check that the type of the node (single versus dual homed) is the
3305
    # same as for the master
3306
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3307
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3308
    newbie_singlehomed = secondary_ip == primary_ip
3309
    if master_singlehomed != newbie_singlehomed:
3310
      if master_singlehomed:
3311
        raise errors.OpPrereqError("The master has no private ip but the"
3312
                                   " new node has one",
3313
                                   errors.ECODE_INVAL)
3314
      else:
3315
        raise errors.OpPrereqError("The master has a private ip but the"
3316
                                   " new node doesn't have one",
3317
                                   errors.ECODE_INVAL)
3318

    
3319
    # checks reachability
3320
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3321
      raise errors.OpPrereqError("Node not reachable by ping",
3322
                                 errors.ECODE_ENVIRON)
3323

    
3324
    if not newbie_singlehomed:
3325
      # check reachability from my secondary ip to newbie's secondary ip
3326
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3327
                           source=myself.secondary_ip):
3328
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3329
                                   " based ping to noded port",
3330
                                   errors.ECODE_ENVIRON)
3331

    
3332
    if self.op.readd:
3333
      exceptions = [node]
3334
    else:
3335
      exceptions = []
3336

    
3337
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3338

    
3339
    if self.op.readd:
3340
      self.new_node = self.cfg.GetNodeInfo(node)
3341
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3342
    else:
3343
      self.new_node = objects.Node(name=node,
3344
                                   primary_ip=primary_ip,
3345
                                   secondary_ip=secondary_ip,
3346
                                   master_candidate=self.master_candidate,
3347
                                   offline=False, drained=False)
3348

    
3349
  def Exec(self, feedback_fn):
3350
    """Adds the new node to the cluster.
3351

3352
    """
3353
    new_node = self.new_node
3354
    node = new_node.name
3355

    
3356
    # for re-adds, reset the offline/drained/master-candidate flags;
3357
    # we need to reset here, otherwise offline would prevent RPC calls
3358
    # later in the procedure; this also means that if the re-add
3359
    # fails, we are left with a non-offlined, broken node
3360
    if self.op.readd:
3361
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3362
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3363
      # if we demote the node, we do cleanup later in the procedure
3364
      new_node.master_candidate = self.master_candidate
3365

    
3366
    # notify the user about any possible mc promotion
3367
    if new_node.master_candidate:
3368
      self.LogInfo("Node will be a master candidate")
3369

    
3370
    # check connectivity
3371
    result = self.rpc.call_version([node])[node]
3372
    result.Raise("Can't get version information from node %s" % node)
3373
    if constants.PROTOCOL_VERSION == result.payload:
3374
      logging.info("Communication to node %s fine, sw version %s match",
3375
                   node, result.payload)
3376
    else:
3377
      raise errors.OpExecError("Version mismatch master version %s,"
3378
                               " node version %s" %
3379
                               (constants.PROTOCOL_VERSION, result.payload))
3380

    
3381
    # setup ssh on node
3382
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3383
      logging.info("Copy ssh key to node %s", node)
3384
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3385
      keyarray = []
3386
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3387
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3388
                  priv_key, pub_key]
3389

    
3390
      for i in keyfiles:
3391
        keyarray.append(utils.ReadFile(i))
3392

    
3393
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3394
                                      keyarray[2], keyarray[3], keyarray[4],
3395
                                      keyarray[5])
3396
      result.Raise("Cannot transfer ssh keys to the new node")
3397

    
3398
    # Add node to our /etc/hosts, and add key to known_hosts
3399
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3400
      utils.AddHostToEtcHosts(new_node.name)
3401

    
3402
    if new_node.secondary_ip != new_node.primary_ip:
3403
      result = self.rpc.call_node_has_ip_address(new_node.name,
3404
                                                 new_node.secondary_ip)
3405
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3406
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3407
      if not result.payload:
3408
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3409
                                 " you gave (%s). Please fix and re-run this"
3410
                                 " command." % new_node.secondary_ip)
3411

    
3412
    node_verify_list = [self.cfg.GetMasterNode()]
3413
    node_verify_param = {
3414
      constants.NV_NODELIST: [node],
3415
      # TODO: do a node-net-test as well?
3416
    }
3417

    
3418
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3419
                                       self.cfg.GetClusterName())
3420
    for verifier in node_verify_list:
3421
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3422
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3423
      if nl_payload:
3424
        for failed in nl_payload:
3425
          feedback_fn("ssh/hostname verification failed"
3426
                      " (checking from %s): %s" %
3427
                      (verifier, nl_payload[failed]))
3428
        raise errors.OpExecError("ssh/hostname verification failed.")
3429

    
3430
    if self.op.readd:
3431
      _RedistributeAncillaryFiles(self)
3432
      self.context.ReaddNode(new_node)
3433
      # make sure we redistribute the config
3434
      self.cfg.Update(new_node, feedback_fn)
3435
      # and make sure the new node will not have old files around
3436
      if not new_node.master_candidate:
3437
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3438
        msg = result.fail_msg
3439
        if msg:
3440
          self.LogWarning("Node failed to demote itself from master"
3441
                          " candidate status: %s" % msg)
3442
    else:
3443
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3444
      self.context.AddNode(new_node, self.proc.GetECId())
3445

    
3446

    
3447
class LUSetNodeParams(LogicalUnit):
3448
  """Modifies the parameters of a node.
3449

3450
  """
3451
  HPATH = "node-modify"
3452
  HTYPE = constants.HTYPE_NODE
3453
  _OP_REQP = ["node_name"]
3454
  REQ_BGL = False
3455

    
3456
  def CheckArguments(self):
3457
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3458
    _CheckBooleanOpField(self.op, 'master_candidate')
3459
    _CheckBooleanOpField(self.op, 'offline')
3460
    _CheckBooleanOpField(self.op, 'drained')
3461
    _CheckBooleanOpField(self.op, 'auto_promote')
3462
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3463
    if all_mods.count(None) == 3:
3464
      raise errors.OpPrereqError("Please pass at least one modification",
3465
                                 errors.ECODE_INVAL)
3466
    if all_mods.count(True) > 1:
3467
      raise errors.OpPrereqError("Can't set the node into more than one"
3468
                                 " state at the same time",
3469
                                 errors.ECODE_INVAL)
3470

    
3471
    # Boolean value that tells us whether we're offlining or draining the node
3472
    self.offline_or_drain = (self.op.offline == True or
3473
                             self.op.drained == True)
3474
    self.deoffline_or_drain = (self.op.offline == False or
3475
                               self.op.drained == False)
3476
    self.might_demote = (self.op.master_candidate == False or
3477
                         self.offline_or_drain)
3478

    
3479
    self.lock_all = self.op.auto_promote and self.might_demote
3480

    
3481

    
3482
  def ExpandNames(self):
3483
    if self.lock_all:
3484
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3485
    else:
3486
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3487

    
3488
  def BuildHooksEnv(self):
3489
    """Build hooks env.
3490

3491
    This runs on the master node.
3492

3493
    """
3494
    env = {
3495
      "OP_TARGET": self.op.node_name,
3496
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3497
      "OFFLINE": str(self.op.offline),
3498
      "DRAINED": str(self.op.drained),
3499
      }
3500
    nl = [self.cfg.GetMasterNode(),
3501
          self.op.node_name]
3502
    return env, nl, nl
3503

    
3504
  def CheckPrereq(self):
3505
    """Check prerequisites.
3506

3507
    This only checks the instance list against the existing names.
3508

3509
    """
3510
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3511

    
3512
    if (self.op.master_candidate is not None or
3513
        self.op.drained is not None or
3514
        self.op.offline is not None):
3515
      # we can't change the master's node flags
3516
      if self.op.node_name == self.cfg.GetMasterNode():
3517
        raise errors.OpPrereqError("The master role can be changed"
3518
                                   " only via masterfailover",
3519
                                   errors.ECODE_INVAL)
3520

    
3521

    
3522
    if node.master_candidate and self.might_demote and not self.lock_all:
3523
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
3524
      # check if after removing the current node, we're missing master
3525
      # candidates
3526
      (mc_remaining, mc_should, _) = \
3527
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3528
      if mc_remaining < mc_should:
3529
        raise errors.OpPrereqError("Not enough master candidates, please"
3530
                                   " pass auto_promote to allow promotion",
3531
                                   errors.ECODE_INVAL)
3532

    
3533
    if (self.op.master_candidate == True and
3534
        ((node.offline and not self.op.offline == False) or
3535
         (node.drained and not self.op.drained == False))):
3536
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3537
                                 " to master_candidate" % node.name,
3538
                                 errors.ECODE_INVAL)
3539

    
3540
    # If we're being deofflined/drained, we'll MC ourself if needed
3541
    if (self.deoffline_or_drain and not self.offline_or_drain and not
3542
        self.op.master_candidate == True and not node.master_candidate):
3543
      self.op.master_candidate = _DecideSelfPromotion(self)
3544
      if self.op.master_candidate:
3545
        self.LogInfo("Autopromoting node to master candidate")
3546

    
3547
    return
3548

    
3549
  def Exec(self, feedback_fn):
3550
    """Modifies a node.
3551

3552
    """
3553
    node = self.node
3554

    
3555
    result = []
3556
    changed_mc = False
3557

    
3558
    if self.op.offline is not None:
3559
      node.offline = self.op.offline
3560
      result.append(("offline", str(self.op.offline)))
3561
      if self.op.offline == True:
3562
        if node.master_candidate:
3563
          node.master_candidate = False
3564
          changed_mc = True
3565
          result.append(("master_candidate", "auto-demotion due to offline"))
3566
        if node.drained:
3567
          node.drained = False
3568
          result.append(("drained", "clear drained status due to offline"))
3569

    
3570
    if self.op.master_candidate is not None:
3571
      node.master_candidate = self.op.master_candidate
3572
      changed_mc = True
3573
      result.append(("master_candidate", str(self.op.master_candidate)))
3574
      if self.op.master_candidate == False:
3575
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3576
        msg = rrc.fail_msg
3577
        if msg:
3578
          self.LogWarning("Node failed to demote itself: %s" % msg)
3579

    
3580
    if self.op.drained is not None:
3581
      node.drained = self.op.drained
3582
      result.append(("drained", str(self.op.drained)))
3583
      if self.op.drained == True:
3584
        if node.master_candidate:
3585
          node.master_candidate = False
3586
          changed_mc = True
3587
          result.append(("master_candidate", "auto-demotion due to drain"))
3588
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3589
          msg = rrc.fail_msg
3590
          if msg:
3591
            self.LogWarning("Node failed to demote itself: %s" % msg)
3592
        if node.offline:
3593
          node.offline = False
3594
          result.append(("offline", "clear offline status due to drain"))
3595

    
3596
    # we locked all nodes, we adjust the CP before updating this node
3597
    if self.lock_all:
3598
      _AdjustCandidatePool(self, [node.name])
3599

    
3600
    # this will trigger configuration file update, if needed
3601
    self.cfg.Update(node, feedback_fn)
3602

    
3603
    # this will trigger job queue propagation or cleanup
3604
    if changed_mc:
3605
      self.context.ReaddNode(node)
3606

    
3607
    return result
3608

    
3609

    
3610
class LUPowercycleNode(NoHooksLU):
3611
  """Powercycles a node.
3612

3613
  """
3614
  _OP_REQP = ["node_name", "force"]
3615
  REQ_BGL = False
3616

    
3617
  def CheckArguments(self):
3618
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3619
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3620
      raise errors.OpPrereqError("The node is the master and the force"
3621
                                 " parameter was not set",
3622
                                 errors.ECODE_INVAL)
3623

    
3624
  def ExpandNames(self):
3625
    """Locking for PowercycleNode.
3626

3627
    This is a last-resort option and shouldn't block on other
3628
    jobs. Therefore, we grab no locks.
3629

3630
    """
3631
    self.needed_locks = {}
3632

    
3633
  def CheckPrereq(self):
3634
    """Check prerequisites.
3635

3636
    This LU has no prereqs.
3637

3638
    """
3639
    pass
3640

    
3641
  def Exec(self, feedback_fn):
3642
    """Reboots a node.
3643

3644
    """
3645
    result = self.rpc.call_node_powercycle(self.op.node_name,
3646
                                           self.cfg.GetHypervisorType())
3647
    result.Raise("Failed to schedule the reboot")
3648
    return result.payload
3649

    
3650

    
3651
class LUQueryClusterInfo(NoHooksLU):
3652
  """Query cluster configuration.
3653

3654
  """
3655
  _OP_REQP = []
3656
  REQ_BGL = False
3657

    
3658
  def ExpandNames(self):
3659
    self.needed_locks = {}
3660

    
3661
  def CheckPrereq(self):
3662
    """No prerequsites needed for this LU.
3663

3664
    """
3665
    pass
3666

    
3667
  def Exec(self, feedback_fn):
3668
    """Return cluster config.
3669

3670
    """
3671
    cluster = self.cfg.GetClusterInfo()
3672
    os_hvp = {}
3673

    
3674
    # Filter just for enabled hypervisors
3675
    for os_name, hv_dict in cluster.os_hvp.items():
3676
      os_hvp[os_name] = {}
3677
      for hv_name, hv_params in hv_dict.items():
3678
        if hv_name in cluster.enabled_hypervisors:
3679
          os_hvp[os_name][hv_name] = hv_params
3680

    
3681
    result = {
3682
      "software_version": constants.RELEASE_VERSION,
3683
      "protocol_version": constants.PROTOCOL_VERSION,
3684
      "config_version": constants.CONFIG_VERSION,
3685
      "os_api_version": max(constants.OS_API_VERSIONS),
3686
      "export_version": constants.EXPORT_VERSION,
3687
      "architecture": (platform.architecture()[0], platform.machine()),
3688
      "name": cluster.cluster_name,
3689
      "master": cluster.master_node,
3690
      "default_hypervisor": cluster.enabled_hypervisors[0],
3691
      "enabled_hypervisors": cluster.enabled_hypervisors,
3692
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3693
                        for hypervisor_name in cluster.enabled_hypervisors]),
3694
      "os_hvp": os_hvp,
3695
      "beparams": cluster.beparams,
3696
      "nicparams": cluster.nicparams,
3697
      "candidate_pool_size": cluster.candidate_pool_size,
3698
      "master_netdev": cluster.master_netdev,
3699
      "volume_group_name": cluster.volume_group_name,
3700
      "file_storage_dir": cluster.file_storage_dir,
3701
      "maintain_node_health": cluster.maintain_node_health,
3702
      "ctime": cluster.ctime,
3703
      "mtime": cluster.mtime,
3704
      "uuid": cluster.uuid,
3705
      "tags": list(cluster.GetTags()),
3706
      }
3707

    
3708
    return result
3709

    
3710

    
3711
class LUQueryConfigValues(NoHooksLU):
3712
  """Return configuration values.
3713

3714
  """
3715
  _OP_REQP = []
3716
  REQ_BGL = False
3717
  _FIELDS_DYNAMIC = utils.FieldSet()
3718
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3719
                                  "watcher_pause")
3720

    
3721
  def ExpandNames(self):
3722
    self.needed_locks = {}
3723

    
3724
    _CheckOutputFields(static=self._FIELDS_STATIC,
3725
                       dynamic=self._FIELDS_DYNAMIC,
3726
                       selected=self.op.output_fields)
3727

    
3728
  def CheckPrereq(self):
3729
    """No prerequisites.
3730

3731
    """
3732
    pass
3733

    
3734
  def Exec(self, feedback_fn):
3735
    """Dump a representation of the cluster config to the standard output.
3736

3737
    """
3738
    values = []
3739
    for field in self.op.output_fields:
3740
      if field == "cluster_name":
3741
        entry = self.cfg.GetClusterName()
3742
      elif field == "master_node":
3743
        entry = self.cfg.GetMasterNode()
3744
      elif field == "drain_flag":
3745
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3746
      elif field == "watcher_pause":
3747
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3748
      else:
3749
        raise errors.ParameterError(field)
3750
      values.append(entry)
3751
    return values
3752

    
3753

    
3754
class LUActivateInstanceDisks(NoHooksLU):
3755
  """Bring up an instance's disks.
3756

3757
  """
3758
  _OP_REQP = ["instance_name"]
3759
  REQ_BGL = False
3760

    
3761
  def ExpandNames(self):
3762
    self._ExpandAndLockInstance()
3763
    self.needed_locks[locking.LEVEL_NODE] = []
3764
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3765

    
3766
  def DeclareLocks(self, level):
3767
    if level == locking.LEVEL_NODE:
3768
      self._LockInstancesNodes()
3769

    
3770
  def CheckPrereq(self):
3771
    """Check prerequisites.
3772

3773
    This checks that the instance is in the cluster.
3774

3775
    """
3776
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3777
    assert self.instance is not None, \
3778
      "Cannot retrieve locked instance %s" % self.op.instance_name
3779
    _CheckNodeOnline(self, self.instance.primary_node)
3780
    if not hasattr(self.op, "ignore_size"):
3781
      self.op.ignore_size = False
3782

    
3783
  def Exec(self, feedback_fn):
3784
    """Activate the disks.
3785

3786
    """
3787
    disks_ok, disks_info = \
3788
              _AssembleInstanceDisks(self, self.instance,
3789
                                     ignore_size=self.op.ignore_size)
3790
    if not disks_ok:
3791
      raise errors.OpExecError("Cannot activate block devices")
3792

    
3793
    return disks_info
3794

    
3795

    
3796
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3797
                           ignore_size=False):
3798
  """Prepare the block devices for an instance.
3799

3800
  This sets up the block devices on all nodes.
3801

3802
  @type lu: L{LogicalUnit}
3803
  @param lu: the logical unit on whose behalf we execute
3804
  @type instance: L{objects.Instance}
3805
  @param instance: the instance for whose disks we assemble
3806
  @type ignore_secondaries: boolean
3807
  @param ignore_secondaries: if true, errors on secondary nodes
3808
      won't result in an error return from the function
3809
  @type ignore_size: boolean
3810
  @param ignore_size: if true, the current known size of the disk
3811
      will not be used during the disk activation, useful for cases
3812
      when the size is wrong
3813
  @return: False if the operation failed, otherwise a list of
3814
      (host, instance_visible_name, node_visible_name)
3815
      with the mapping from node devices to instance devices
3816

3817
  """
3818
  device_info = []
3819
  disks_ok = True
3820
  iname = instance.name
3821
  # With the two passes mechanism we try to reduce the window of
3822
  # opportunity for the race condition of switching DRBD to primary
3823
  # before handshaking occured, but we do not eliminate it
3824

    
3825
  # The proper fix would be to wait (with some limits) until the
3826
  # connection has been made and drbd transitions from WFConnection
3827
  # into any other network-connected state (Connected, SyncTarget,
3828
  # SyncSource, etc.)
3829

    
3830
  # 1st pass, assemble on all nodes in secondary mode
3831
  for inst_disk in instance.disks:
3832
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3833
      if ignore_size:
3834
        node_disk = node_disk.Copy()
3835
        node_disk.UnsetSize()
3836
      lu.cfg.SetDiskID(node_disk, node)
3837
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3838
      msg = result.fail_msg
3839
      if msg:
3840
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3841
                           " (is_primary=False, pass=1): %s",
3842
                           inst_disk.iv_name, node, msg)
3843
        if not ignore_secondaries:
3844
          disks_ok = False
3845

    
3846
  # FIXME: race condition on drbd migration to primary
3847

    
3848
  # 2nd pass, do only the primary node
3849
  for inst_disk in instance.disks:
3850
    dev_path = None
3851

    
3852
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3853
      if node != instance.primary_node:
3854
        continue
3855
      if ignore_size:
3856
        node_disk = node_disk.Copy()
3857
        node_disk.UnsetSize()
3858
      lu.cfg.SetDiskID(node_disk, node)
3859
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3860
      msg = result.fail_msg
3861
      if msg:
3862
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3863
                           " (is_primary=True, pass=2): %s",
3864
                           inst_disk.iv_name, node, msg)
3865
        disks_ok = False
3866
      else:
3867
        dev_path = result.payload
3868

    
3869
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3870

    
3871
  # leave the disks configured for the primary node
3872
  # this is a workaround that would be fixed better by
3873
  # improving the logical/physical id handling
3874
  for disk in instance.disks:
3875
    lu.cfg.SetDiskID(disk, instance.primary_node)
3876

    
3877
  return disks_ok, device_info
3878

    
3879

    
3880
def _StartInstanceDisks(lu, instance, force):
3881
  """Start the disks of an instance.
3882

3883
  """
3884
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3885
                                           ignore_secondaries=force)
3886
  if not disks_ok:
3887
    _ShutdownInstanceDisks(lu, instance)
3888
    if force is not None and not force:
3889
      lu.proc.LogWarning("", hint="If the message above refers to a"
3890
                         " secondary node,"
3891
                         " you can retry the operation using '--force'.")
3892
    raise errors.OpExecError("Disk consistency error")
3893

    
3894

    
3895
class LUDeactivateInstanceDisks(NoHooksLU):
3896
  """Shutdown an instance's disks.
3897

3898
  """
3899
  _OP_REQP = ["instance_name"]
3900
  REQ_BGL = False
3901

    
3902
  def ExpandNames(self):
3903
    self._ExpandAndLockInstance()
3904
    self.needed_locks[locking.LEVEL_NODE] = []
3905
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3906

    
3907
  def DeclareLocks(self, level):
3908
    if level == locking.LEVEL_NODE:
3909
      self._LockInstancesNodes()
3910

    
3911
  def CheckPrereq(self):
3912
    """Check prerequisites.
3913

3914
    This checks that the instance is in the cluster.
3915

3916
    """
3917
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3918
    assert self.instance is not None, \
3919
      "Cannot retrieve locked instance %s" % self.op.instance_name
3920

    
3921
  def Exec(self, feedback_fn):
3922
    """Deactivate the disks
3923

3924
    """
3925
    instance = self.instance
3926
    _SafeShutdownInstanceDisks(self, instance)
3927

    
3928

    
3929
def _SafeShutdownInstanceDisks(lu, instance):
3930
  """Shutdown block devices of an instance.
3931

3932
  This function checks if an instance is running, before calling
3933
  _ShutdownInstanceDisks.
3934

3935
  """
3936
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
3937
  _ShutdownInstanceDisks(lu, instance)
3938

    
3939

    
3940
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3941
  """Shutdown block devices of an instance.
3942

3943
  This does the shutdown on all nodes of the instance.
3944

3945
  If the ignore_primary is false, errors on the primary node are
3946
  ignored.
3947

3948
  """
3949
  all_result = True
3950
  for disk in instance.disks:
3951
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3952
      lu.cfg.SetDiskID(top_disk, node)
3953
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3954
      msg = result.fail_msg
3955
      if msg:
3956
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3957
                      disk.iv_name, node, msg)
3958
        if not ignore_primary or node != instance.primary_node:
3959
          all_result = False
3960
  return all_result
3961

    
3962

    
3963
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3964
  """Checks if a node has enough free memory.
3965

3966
  This function check if a given node has the needed amount of free
3967
  memory. In case the node has less memory or we cannot get the
3968
  information from the node, this function raise an OpPrereqError
3969
  exception.
3970

3971
  @type lu: C{LogicalUnit}
3972
  @param lu: a logical unit from which we get configuration data
3973
  @type node: C{str}
3974
  @param node: the node to check
3975
  @type reason: C{str}
3976
  @param reason: string to use in the error message
3977
  @type requested: C{int}
3978
  @param requested: the amount of memory in MiB to check for
3979
  @type hypervisor_name: C{str}
3980
  @param hypervisor_name: the hypervisor to ask for memory stats
3981
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3982
      we cannot check the node
3983

3984
  """
3985
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3986
  nodeinfo[node].Raise("Can't get data from node %s" % node,
3987
                       prereq=True, ecode=errors.ECODE_ENVIRON)
3988
  free_mem = nodeinfo[node].payload.get('memory_free', None)
3989
  if not isinstance(free_mem, int):
3990
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3991
                               " was '%s'" % (node, free_mem),
3992
                               errors.ECODE_ENVIRON)
3993
  if requested > free_mem:
3994
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3995
                               " needed %s MiB, available %s MiB" %
3996
                               (node, reason, requested, free_mem),
3997
                               errors.ECODE_NORES)
3998

    
3999

    
4000
def _CheckNodesFreeDisk(lu, nodenames, requested):
4001
  """Checks if nodes have enough free disk space in the default VG.
4002

4003
  This function check if all given nodes have the needed amount of
4004
  free disk. In case any node has less disk or we cannot get the
4005
  information from the node, this function raise an OpPrereqError
4006
  exception.
4007

4008
  @type lu: C{LogicalUnit}
4009
  @param lu: a logical unit from which we get configuration data
4010
  @type nodenames: C{list}
4011
  @param nodenames: the list of node names to check
4012
  @type requested: C{int}
4013
  @param requested: the amount of disk in MiB to check for
4014
  @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4015
      we cannot check the node
4016

4017
  """
4018
  nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4019
                                   lu.cfg.GetHypervisorType())
4020
  for node in nodenames:
4021
    info = nodeinfo[node]
4022
    info.Raise("Cannot get current information from node %s" % node,
4023
               prereq=True, ecode=errors.ECODE_ENVIRON)
4024
    vg_free = info.payload.get("vg_free", None)
4025
    if not isinstance(vg_free, int):
4026
      raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4027
                                 " result was '%s'" % (node, vg_free),
4028
                                 errors.ECODE_ENVIRON)
4029
    if requested > vg_free:
4030
      raise errors.OpPrereqError("Not enough disk space on target node %s:"
4031
                                 " required %d MiB, available %d MiB" %
4032
                                 (node, requested, vg_free),
4033
                                 errors.ECODE_NORES)
4034

    
4035

    
4036
class LUStartupInstance(LogicalUnit):
4037
  """Starts an instance.
4038

4039
  """
4040
  HPATH = "instance-start"
4041
  HTYPE = constants.HTYPE_INSTANCE
4042
  _OP_REQP = ["instance_name", "force"]
4043
  REQ_BGL = False
4044

    
4045
  def ExpandNames(self):
4046
    self._ExpandAndLockInstance()
4047

    
4048
  def BuildHooksEnv(self):
4049
    """Build hooks env.
4050

4051
    This runs on master, primary and secondary nodes of the instance.
4052

4053
    """
4054
    env = {
4055
      "FORCE": self.op.force,
4056
      }
4057
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4058
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4059
    return env, nl, nl
4060

    
4061
  def CheckPrereq(self):
4062
    """Check prerequisites.
4063

4064
    This checks that the instance is in the cluster.
4065

4066
    """
4067
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4068
    assert self.instance is not None, \
4069
      "Cannot retrieve locked instance %s" % self.op.instance_name
4070

    
4071
    # extra beparams
4072
    self.beparams = getattr(self.op, "beparams", {})
4073
    if self.beparams:
4074
      if not isinstance(self.beparams, dict):
4075
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4076
                                   " dict" % (type(self.beparams), ),
4077
                                   errors.ECODE_INVAL)
4078
      # fill the beparams dict
4079
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4080
      self.op.beparams = self.beparams
4081

    
4082
    # extra hvparams
4083
    self.hvparams = getattr(self.op, "hvparams", {})
4084
    if self.hvparams:
4085
      if not isinstance(self.hvparams, dict):
4086
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4087
                                   " dict" % (type(self.hvparams), ),
4088
                                   errors.ECODE_INVAL)
4089

    
4090
      # check hypervisor parameter syntax (locally)
4091
      cluster = self.cfg.GetClusterInfo()
4092
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4093
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
4094
                                    instance.hvparams)
4095
      filled_hvp.update(self.hvparams)
4096
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4097
      hv_type.CheckParameterSyntax(filled_hvp)
4098
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4099
      self.op.hvparams = self.hvparams
4100

    
4101
    _CheckNodeOnline(self, instance.primary_node)
4102

    
4103
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4104
    # check bridges existence
4105
    _CheckInstanceBridgesExist(self, instance)
4106

    
4107
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4108
                                              instance.name,
4109
                                              instance.hypervisor)
4110
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4111
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4112
    if not remote_info.payload: # not running already
4113
      _CheckNodeFreeMemory(self, instance.primary_node,
4114
                           "starting instance %s" % instance.name,
4115
                           bep[constants.BE_MEMORY], instance.hypervisor)
4116

    
4117
  def Exec(self, feedback_fn):
4118
    """Start the instance.
4119

4120
    """
4121
    instance = self.instance
4122
    force = self.op.force
4123

    
4124
    self.cfg.MarkInstanceUp(instance.name)
4125

    
4126
    node_current = instance.primary_node
4127

    
4128
    _StartInstanceDisks(self, instance, force)
4129

    
4130
    result = self.rpc.call_instance_start(node_current, instance,
4131
                                          self.hvparams, self.beparams)
4132
    msg = result.fail_msg
4133
    if msg:
4134
      _ShutdownInstanceDisks(self, instance)
4135
      raise errors.OpExecError("Could not start instance: %s" % msg)
4136

    
4137

    
4138
class LURebootInstance(LogicalUnit):
4139
  """Reboot an instance.
4140

4141
  """
4142
  HPATH = "instance-reboot"
4143
  HTYPE = constants.HTYPE_INSTANCE
4144
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4145
  REQ_BGL = False
4146

    
4147
  def CheckArguments(self):
4148
    """Check the arguments.
4149

4150
    """
4151
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4152
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4153

    
4154
  def ExpandNames(self):
4155
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4156
                                   constants.INSTANCE_REBOOT_HARD,
4157
                                   constants.INSTANCE_REBOOT_FULL]:
4158
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4159
                                  (constants.INSTANCE_REBOOT_SOFT,
4160
                                   constants.INSTANCE_REBOOT_HARD,
4161
                                   constants.INSTANCE_REBOOT_FULL))
4162
    self._ExpandAndLockInstance()
4163

    
4164
  def BuildHooksEnv(self):
4165
    """Build hooks env.
4166

4167
    This runs on master, primary and secondary nodes of the instance.
4168

4169
    """
4170
    env = {
4171
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4172
      "REBOOT_TYPE": self.op.reboot_type,
4173
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4174
      }
4175
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4176
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4177
    return env, nl, nl
4178

    
4179
  def CheckPrereq(self):
4180
    """Check prerequisites.
4181

4182
    This checks that the instance is in the cluster.
4183

4184
    """
4185
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4186
    assert self.instance is not None, \
4187
      "Cannot retrieve locked instance %s" % self.op.instance_name
4188

    
4189
    _CheckNodeOnline(self, instance.primary_node)
4190

    
4191
    # check bridges existence
4192
    _CheckInstanceBridgesExist(self, instance)
4193

    
4194
  def Exec(self, feedback_fn):
4195
    """Reboot the instance.
4196

4197
    """
4198
    instance = self.instance
4199
    ignore_secondaries = self.op.ignore_secondaries
4200
    reboot_type = self.op.reboot_type
4201

    
4202
    node_current = instance.primary_node
4203

    
4204
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4205
                       constants.INSTANCE_REBOOT_HARD]:
4206
      for disk in instance.disks:
4207
        self.cfg.SetDiskID(disk, node_current)
4208
      result = self.rpc.call_instance_reboot(node_current, instance,
4209
                                             reboot_type,
4210
                                             self.shutdown_timeout)
4211
      result.Raise("Could not reboot instance")
4212
    else:
4213
      result = self.rpc.call_instance_shutdown(node_current, instance,
4214
                                               self.shutdown_timeout)
4215
      result.Raise("Could not shutdown instance for full reboot")
4216
      _ShutdownInstanceDisks(self, instance)
4217
      _StartInstanceDisks(self, instance, ignore_secondaries)
4218
      result = self.rpc.call_instance_start(node_current, instance, None, None)
4219
      msg = result.fail_msg
4220
      if msg:
4221
        _ShutdownInstanceDisks(self, instance)
4222
        raise errors.OpExecError("Could not start instance for"
4223
                                 " full reboot: %s" % msg)
4224

    
4225
    self.cfg.MarkInstanceUp(instance.name)
4226

    
4227

    
4228
class LUShutdownInstance(LogicalUnit):
4229
  """Shutdown an instance.
4230

4231
  """
4232
  HPATH = "instance-stop"
4233
  HTYPE = constants.HTYPE_INSTANCE
4234
  _OP_REQP = ["instance_name"]
4235
  REQ_BGL = False
4236

    
4237
  def CheckArguments(self):
4238
    """Check the arguments.
4239

4240
    """
4241
    self.timeout = getattr(self.op, "timeout",
4242
                           constants.DEFAULT_SHUTDOWN_TIMEOUT)
4243

    
4244
  def ExpandNames(self):
4245
    self._ExpandAndLockInstance()
4246

    
4247
  def BuildHooksEnv(self):
4248
    """Build hooks env.
4249

4250
    This runs on master, primary and secondary nodes of the instance.
4251

4252
    """
4253
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4254
    env["TIMEOUT"] = self.timeout
4255
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4256
    return env, nl, nl
4257

    
4258
  def CheckPrereq(self):
4259
    """Check prerequisites.
4260

4261
    This checks that the instance is in the cluster.
4262

4263
    """
4264
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4265
    assert self.instance is not None, \
4266
      "Cannot retrieve locked instance %s" % self.op.instance_name
4267
    _CheckNodeOnline(self, self.instance.primary_node)
4268

    
4269
  def Exec(self, feedback_fn):
4270
    """Shutdown the instance.
4271

4272
    """
4273
    instance = self.instance
4274
    node_current = instance.primary_node
4275
    timeout = self.timeout
4276
    self.cfg.MarkInstanceDown(instance.name)
4277
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4278
    msg = result.fail_msg
4279
    if msg:
4280
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4281

    
4282
    _ShutdownInstanceDisks(self, instance)
4283

    
4284

    
4285
class LUReinstallInstance(LogicalUnit):
4286
  """Reinstall an instance.
4287

4288
  """
4289
  HPATH = "instance-reinstall"
4290
  HTYPE = constants.HTYPE_INSTANCE
4291
  _OP_REQP = ["instance_name"]
4292
  REQ_BGL = False
4293

    
4294
  def ExpandNames(self):
4295
    self._ExpandAndLockInstance()
4296

    
4297
  def BuildHooksEnv(self):
4298
    """Build hooks env.
4299

4300
    This runs on master, primary and secondary nodes of the instance.
4301

4302
    """
4303
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4304
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4305
    return env, nl, nl
4306

    
4307
  def CheckPrereq(self):
4308
    """Check prerequisites.
4309

4310
    This checks that the instance is in the cluster and is not running.
4311

4312
    """
4313
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4314
    assert instance is not None, \
4315
      "Cannot retrieve locked instance %s" % self.op.instance_name
4316
    _CheckNodeOnline(self, instance.primary_node)
4317

    
4318
    if instance.disk_template == constants.DT_DISKLESS:
4319
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4320
                                 self.op.instance_name,
4321
                                 errors.ECODE_INVAL)
4322
    _CheckInstanceDown(self, instance, "cannot reinstall")
4323

    
4324
    self.op.os_type = getattr(self.op, "os_type", None)
4325
    self.op.force_variant = getattr(self.op, "force_variant", False)
4326
    if self.op.os_type is not None:
4327
      # OS verification
4328
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4329
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4330

    
4331
    self.instance = instance
4332

    
4333
  def Exec(self, feedback_fn):
4334
    """Reinstall the instance.
4335

4336
    """
4337
    inst = self.instance
4338

    
4339
    if self.op.os_type is not None:
4340
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4341
      inst.os = self.op.os_type
4342
      self.cfg.Update(inst, feedback_fn)
4343

    
4344
    _StartInstanceDisks(self, inst, None)
4345
    try:
4346
      feedback_fn("Running the instance OS create scripts...")
4347
      # FIXME: pass debug option from opcode to backend
4348
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4349
                                             self.op.debug_level)
4350
      result.Raise("Could not install OS for instance %s on node %s" %
4351
                   (inst.name, inst.primary_node))
4352
    finally:
4353
      _ShutdownInstanceDisks(self, inst)
4354

    
4355

    
4356
class LURecreateInstanceDisks(LogicalUnit):
4357
  """Recreate an instance's missing disks.
4358

4359
  """
4360
  HPATH = "instance-recreate-disks"
4361
  HTYPE = constants.HTYPE_INSTANCE
4362
  _OP_REQP = ["instance_name", "disks"]
4363
  REQ_BGL = False
4364

    
4365
  def CheckArguments(self):
4366
    """Check the arguments.
4367

4368
    """
4369
    if not isinstance(self.op.disks, list):
4370
      raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4371
    for item in self.op.disks:
4372
      if (not isinstance(item, int) or
4373
          item < 0):
4374
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
4375
                                   str(item), errors.ECODE_INVAL)
4376

    
4377
  def ExpandNames(self):
4378
    self._ExpandAndLockInstance()
4379

    
4380
  def BuildHooksEnv(self):
4381
    """Build hooks env.
4382

4383
    This runs on master, primary and secondary nodes of the instance.
4384

4385
    """
4386
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4387
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4388
    return env, nl, nl
4389

    
4390
  def CheckPrereq(self):
4391
    """Check prerequisites.
4392

4393
    This checks that the instance is in the cluster and is not running.
4394

4395
    """
4396
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4397
    assert instance is not None, \
4398
      "Cannot retrieve locked instance %s" % self.op.instance_name
4399
    _CheckNodeOnline(self, instance.primary_node)
4400

    
4401
    if instance.disk_template == constants.DT_DISKLESS:
4402
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4403
                                 self.op.instance_name, errors.ECODE_INVAL)
4404
    _CheckInstanceDown(self, instance, "cannot recreate disks")
4405

    
4406
    if not self.op.disks:
4407
      self.op.disks = range(len(instance.disks))
4408
    else:
4409
      for idx in self.op.disks:
4410
        if idx >= len(instance.disks):
4411
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4412
                                     errors.ECODE_INVAL)
4413

    
4414
    self.instance = instance
4415

    
4416
  def Exec(self, feedback_fn):
4417
    """Recreate the disks.
4418

4419
    """
4420
    to_skip = []
4421
    for idx, _ in enumerate(self.instance.disks):
4422
      if idx not in self.op.disks: # disk idx has not been passed in
4423
        to_skip.append(idx)
4424
        continue
4425

    
4426
    _CreateDisks(self, self.instance, to_skip=to_skip)
4427

    
4428

    
4429
class LURenameInstance(LogicalUnit):
4430
  """Rename an instance.
4431

4432
  """
4433
  HPATH = "instance-rename"
4434
  HTYPE = constants.HTYPE_INSTANCE
4435
  _OP_REQP = ["instance_name", "new_name"]
4436

    
4437
  def BuildHooksEnv(self):
4438
    """Build hooks env.
4439

4440
    This runs on master, primary and secondary nodes of the instance.
4441

4442
    """
4443
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4444
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4445
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4446
    return env, nl, nl
4447

    
4448
  def CheckPrereq(self):
4449
    """Check prerequisites.
4450

4451
    This checks that the instance is in the cluster and is not running.
4452

4453
    """
4454
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4455
                                                self.op.instance_name)
4456
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4457
    assert instance is not None
4458
    _CheckNodeOnline(self, instance.primary_node)
4459
    _CheckInstanceDown(self, instance, "cannot rename")
4460
    self.instance = instance
4461

    
4462
    # new name verification
4463
    name_info = utils.GetHostInfo(self.op.new_name)
4464

    
4465
    self.op.new_name = new_name = name_info.name
4466
    instance_list = self.cfg.GetInstanceList()
4467
    if new_name in instance_list:
4468
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4469
                                 new_name, errors.ECODE_EXISTS)
4470

    
4471
    if not getattr(self.op, "ignore_ip", False):
4472
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4473
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4474
                                   (name_info.ip, new_name),
4475
                                   errors.ECODE_NOTUNIQUE)
4476

    
4477

    
4478
  def Exec(self, feedback_fn):
4479
    """Reinstall the instance.
4480

4481
    """
4482
    inst = self.instance
4483
    old_name = inst.name
4484

    
4485
    if inst.disk_template == constants.DT_FILE:
4486
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4487

    
4488
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4489
    # Change the instance lock. This is definitely safe while we hold the BGL
4490
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4491
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4492

    
4493
    # re-read the instance from the configuration after rename
4494
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4495

    
4496
    if inst.disk_template == constants.DT_FILE:
4497
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4498
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4499
                                                     old_file_storage_dir,
4500
                                                     new_file_storage_dir)
4501
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4502
                   " (but the instance has been renamed in Ganeti)" %
4503
                   (inst.primary_node, old_file_storage_dir,
4504
                    new_file_storage_dir))
4505

    
4506
    _StartInstanceDisks(self, inst, None)
4507
    try:
4508
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4509
                                                 old_name, self.op.debug_level)
4510
      msg = result.fail_msg
4511
      if msg:
4512
        msg = ("Could not run OS rename script for instance %s on node %s"
4513
               " (but the instance has been renamed in Ganeti): %s" %
4514
               (inst.name, inst.primary_node, msg))
4515
        self.proc.LogWarning(msg)
4516
    finally:
4517
      _ShutdownInstanceDisks(self, inst)
4518

    
4519

    
4520
class LURemoveInstance(LogicalUnit):
4521
  """Remove an instance.
4522

4523
  """
4524
  HPATH = "instance-remove"
4525
  HTYPE = constants.HTYPE_INSTANCE
4526
  _OP_REQP = ["instance_name", "ignore_failures"]
4527
  REQ_BGL = False
4528

    
4529
  def CheckArguments(self):
4530
    """Check the arguments.
4531

4532
    """
4533
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4534
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4535

    
4536
  def ExpandNames(self):
4537
    self._ExpandAndLockInstance()
4538
    self.needed_locks[locking.LEVEL_NODE] = []
4539
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4540

    
4541
  def DeclareLocks(self, level):
4542
    if level == locking.LEVEL_NODE:
4543
      self._LockInstancesNodes()
4544

    
4545
  def BuildHooksEnv(self):
4546
    """Build hooks env.
4547

4548
    This runs on master, primary and secondary nodes of the instance.
4549

4550
    """
4551
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4552
    env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4553
    nl = [self.cfg.GetMasterNode()]
4554
    nl_post = list(self.instance.all_nodes) + nl
4555
    return env, nl, nl_post
4556

    
4557
  def CheckPrereq(self):
4558
    """Check prerequisites.
4559

4560
    This checks that the instance is in the cluster.
4561

4562
    """
4563
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4564
    assert self.instance is not None, \
4565
      "Cannot retrieve locked instance %s" % self.op.instance_name
4566

    
4567
  def Exec(self, feedback_fn):
4568
    """Remove the instance.
4569

4570
    """
4571
    instance = self.instance
4572
    logging.info("Shutting down instance %s on node %s",
4573
                 instance.name, instance.primary_node)
4574

    
4575
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4576
                                             self.shutdown_timeout)
4577
    msg = result.fail_msg
4578
    if msg:
4579
      if self.op.ignore_failures:
4580
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
4581
      else:
4582
        raise errors.OpExecError("Could not shutdown instance %s on"
4583
                                 " node %s: %s" %
4584
                                 (instance.name, instance.primary_node, msg))
4585

    
4586
    logging.info("Removing block devices for instance %s", instance.name)
4587

    
4588
    if not _RemoveDisks(self, instance):
4589
      if self.op.ignore_failures:
4590
        feedback_fn("Warning: can't remove instance's disks")
4591
      else:
4592
        raise errors.OpExecError("Can't remove instance's disks")
4593

    
4594
    logging.info("Removing instance %s out of cluster config", instance.name)
4595

    
4596
    self.cfg.RemoveInstance(instance.name)
4597
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4598

    
4599

    
4600
class LUQueryInstances(NoHooksLU):
4601
  """Logical unit for querying instances.
4602

4603
  """
4604
  # pylint: disable-msg=W0142
4605
  _OP_REQP = ["output_fields", "names", "use_locking"]
4606
  REQ_BGL = False
4607
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4608
                    "serial_no", "ctime", "mtime", "uuid"]
4609
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4610
                                    "admin_state",
4611
                                    "disk_template", "ip", "mac", "bridge",
4612
                                    "nic_mode", "nic_link",
4613
                                    "sda_size", "sdb_size", "vcpus", "tags",
4614
                                    "network_port", "beparams",
4615
                                    r"(disk)\.(size)/([0-9]+)",
4616
                                    r"(disk)\.(sizes)", "disk_usage",
4617
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4618
                                    r"(nic)\.(bridge)/([0-9]+)",
4619
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
4620
                                    r"(disk|nic)\.(count)",
4621
                                    "hvparams",
4622
                                    ] + _SIMPLE_FIELDS +
4623
                                  ["hv/%s" % name
4624
                                   for name in constants.HVS_PARAMETERS
4625
                                   if name not in constants.HVC_GLOBALS] +
4626
                                  ["be/%s" % name
4627
                                   for name in constants.BES_PARAMETERS])
4628
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4629

    
4630

    
4631
  def ExpandNames(self):
4632
    _CheckOutputFields(static=self._FIELDS_STATIC,
4633
                       dynamic=self._FIELDS_DYNAMIC,
4634
                       selected=self.op.output_fields)
4635

    
4636
    self.needed_locks = {}
4637
    self.share_locks[locking.LEVEL_INSTANCE] = 1
4638
    self.share_locks[locking.LEVEL_NODE] = 1
4639

    
4640
    if self.op.names:
4641
      self.wanted = _GetWantedInstances(self, self.op.names)
4642
    else:
4643
      self.wanted = locking.ALL_SET
4644

    
4645
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4646
    self.do_locking = self.do_node_query and self.op.use_locking
4647
    if self.do_locking:
4648
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4649
      self.needed_locks[locking.LEVEL_NODE] = []
4650
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4651

    
4652
  def DeclareLocks(self, level):
4653
    if level == locking.LEVEL_NODE and self.do_locking:
4654
      self._LockInstancesNodes()
4655

    
4656
  def CheckPrereq(self):
4657
    """Check prerequisites.
4658

4659
    """
4660
    pass
4661

    
4662
  def Exec(self, feedback_fn):
4663
    """Computes the list of nodes and their attributes.
4664

4665
    """
4666
    # pylint: disable-msg=R0912
4667
    # way too many branches here
4668
    all_info = self.cfg.GetAllInstancesInfo()
4669
    if self.wanted == locking.ALL_SET:
4670
      # caller didn't specify instance names, so ordering is not important
4671
      if self.do_locking:
4672
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4673
      else:
4674
        instance_names = all_info.keys()
4675
      instance_names = utils.NiceSort(instance_names)
4676
    else:
4677
      # caller did specify names, so we must keep the ordering
4678
      if self.do_locking:
4679
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4680
      else:
4681
        tgt_set = all_info.keys()
4682
      missing = set(self.wanted).difference(tgt_set)
4683
      if missing:
4684
        raise errors.OpExecError("Some instances were removed before"
4685
                                 " retrieving their data: %s" % missing)
4686
      instance_names = self.wanted
4687

    
4688
    instance_list = [all_info[iname] for iname in instance_names]
4689

    
4690
    # begin data gathering
4691

    
4692
    nodes = frozenset([inst.primary_node for inst in instance_list])
4693
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4694

    
4695
    bad_nodes = []
4696
    off_nodes = []
4697
    if self.do_node_query:
4698
      live_data = {}
4699
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4700
      for name in nodes:
4701
        result = node_data[name]
4702
        if result.offline:
4703
          # offline nodes will be in both lists
4704
          off_nodes.append(name)
4705
        if result.fail_msg:
4706
          bad_nodes.append(name)
4707
        else:
4708
          if result.payload:
4709
            live_data.update(result.payload)
4710
          # else no instance is alive
4711
    else:
4712
      live_data = dict([(name, {}) for name in instance_names])
4713

    
4714
    # end data gathering
4715

    
4716
    HVPREFIX = "hv/"
4717
    BEPREFIX = "be/"
4718
    output = []
4719
    cluster = self.cfg.GetClusterInfo()
4720
    for instance in instance_list:
4721
      iout = []
4722
      i_hv = cluster.FillHV(instance, skip_globals=True)
4723
      i_be = cluster.FillBE(instance)
4724
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4725
                                 nic.nicparams) for nic in instance.nics]
4726
      for field in self.op.output_fields:
4727
        st_match = self._FIELDS_STATIC.Matches(field)
4728
        if field in self._SIMPLE_FIELDS:
4729
          val = getattr(instance, field)
4730
        elif field == "pnode":
4731
          val = instance.primary_node
4732
        elif field == "snodes":
4733
          val = list(instance.secondary_nodes)
4734
        elif field == "admin_state":
4735
          val = instance.admin_up
4736
        elif field == "oper_state":
4737
          if instance.primary_node in bad_nodes:
4738
            val = None
4739
          else:
4740
            val = bool(live_data.get(instance.name))
4741
        elif field == "status":
4742
          if instance.primary_node in off_nodes:
4743
            val = "ERROR_nodeoffline"
4744
          elif instance.primary_node in bad_nodes:
4745
            val = "ERROR_nodedown"
4746
          else:
4747
            running = bool(live_data.get(instance.name))
4748
            if running:
4749
              if instance.admin_up:
4750
                val = "running"
4751
              else:
4752
                val = "ERROR_up"
4753
            else:
4754
              if instance.admin_up:
4755
                val = "ERROR_down"
4756
              else:
4757
                val = "ADMIN_down"
4758
        elif field == "oper_ram":
4759
          if instance.primary_node in bad_nodes:
4760
            val = None
4761
          elif instance.name in live_data:
4762
            val = live_data[instance.name].get("memory", "?")
4763
          else:
4764
            val = "-"
4765
        elif field == "vcpus":
4766
          val = i_be[constants.BE_VCPUS]
4767
        elif field == "disk_template":
4768
          val = instance.disk_template
4769
        elif field == "ip":
4770
          if instance.nics:
4771
            val = instance.nics[0].ip
4772
          else:
4773
            val = None
4774
        elif field == "nic_mode":
4775
          if instance.nics:
4776
            val = i_nicp[0][constants.NIC_MODE]
4777
          else:
4778
            val = None
4779
        elif field == "nic_link":
4780
          if instance.nics:
4781
            val = i_nicp[0][constants.NIC_LINK]
4782
          else:
4783
            val = None
4784
        elif field == "bridge":
4785
          if (instance.nics and
4786
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4787
            val = i_nicp[0][constants.NIC_LINK]
4788
          else:
4789
            val = None
4790
        elif field == "mac":
4791
          if instance.nics:
4792
            val = instance.nics[0].mac
4793
          else:
4794
            val = None
4795
        elif field == "sda_size" or field == "sdb_size":
4796
          idx = ord(field[2]) - ord('a')
4797
          try:
4798
            val = instance.FindDisk(idx).size
4799
          except errors.OpPrereqError:
4800
            val = None
4801
        elif field == "disk_usage": # total disk usage per node
4802
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
4803
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4804
        elif field == "tags":
4805
          val = list(instance.GetTags())
4806
        elif field == "hvparams":
4807
          val = i_hv
4808
        elif (field.startswith(HVPREFIX) and
4809
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4810
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4811
          val = i_hv.get(field[len(HVPREFIX):], None)
4812
        elif field == "beparams":
4813
          val = i_be
4814
        elif (field.startswith(BEPREFIX) and
4815
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4816
          val = i_be.get(field[len(BEPREFIX):], None)
4817
        elif st_match and st_match.groups():
4818
          # matches a variable list
4819
          st_groups = st_match.groups()
4820
          if st_groups and st_groups[0] == "disk":
4821
            if st_groups[1] == "count":
4822
              val = len(instance.disks)
4823
            elif st_groups[1] == "sizes":
4824
              val = [disk.size for disk in instance.disks]
4825
            elif st_groups[1] == "size":
4826
              try:
4827
                val = instance.FindDisk(st_groups[2]).size
4828
              except errors.OpPrereqError:
4829
                val = None
4830
            else:
4831
              assert False, "Unhandled disk parameter"
4832
          elif st_groups[0] == "nic":
4833
            if st_groups[1] == "count":
4834
              val = len(instance.nics)
4835
            elif st_groups[1] == "macs":
4836
              val = [nic.mac for nic in instance.nics]
4837
            elif st_groups[1] == "ips":
4838
              val = [nic.ip for nic in instance.nics]
4839
            elif st_groups[1] == "modes":
4840
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4841
            elif st_groups[1] == "links":
4842
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4843
            elif st_groups[1] == "bridges":
4844
              val = []
4845
              for nicp in i_nicp:
4846
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4847
                  val.append(nicp[constants.NIC_LINK])
4848
                else:
4849
                  val.append(None)
4850
            else:
4851
              # index-based item
4852
              nic_idx = int(st_groups[2])
4853
              if nic_idx >= len(instance.nics):
4854
                val = None
4855
              else:
4856
                if st_groups[1] == "mac":
4857
                  val = instance.nics[nic_idx].mac
4858
                elif st_groups[1] == "ip":
4859
                  val = instance.nics[nic_idx].ip
4860
                elif st_groups[1] == "mode":
4861
                  val = i_nicp[nic_idx][constants.NIC_MODE]
4862
                elif st_groups[1] == "link":
4863
                  val = i_nicp[nic_idx][constants.NIC_LINK]
4864
                elif st_groups[1] == "bridge":
4865
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4866
                  if nic_mode == constants.NIC_MODE_BRIDGED:
4867
                    val = i_nicp[nic_idx][constants.NIC_LINK]
4868
                  else:
4869
                    val = None
4870
                else:
4871
                  assert False, "Unhandled NIC parameter"
4872
          else:
4873
            assert False, ("Declared but unhandled variable parameter '%s'" %
4874
                           field)
4875
        else:
4876
          assert False, "Declared but unhandled parameter '%s'" % field
4877
        iout.append(val)
4878
      output.append(iout)
4879

    
4880
    return output
4881

    
4882

    
4883
class LUFailoverInstance(LogicalUnit):
4884
  """Failover an instance.
4885

4886
  """
4887
  HPATH = "instance-failover"
4888
  HTYPE = constants.HTYPE_INSTANCE
4889
  _OP_REQP = ["instance_name", "ignore_consistency"]
4890
  REQ_BGL = False
4891

    
4892
  def CheckArguments(self):
4893
    """Check the arguments.
4894

4895
    """
4896
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4897
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4898

    
4899
  def ExpandNames(self):
4900
    self._ExpandAndLockInstance()
4901
    self.needed_locks[locking.LEVEL_NODE] = []
4902
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4903

    
4904
  def DeclareLocks(self, level):
4905
    if level == locking.LEVEL_NODE:
4906
      self._LockInstancesNodes()
4907

    
4908
  def BuildHooksEnv(self):
4909
    """Build hooks env.
4910

4911
    This runs on master, primary and secondary nodes of the instance.
4912

4913
    """
4914
    instance = self.instance
4915
    source_node = instance.primary_node
4916
    target_node = instance.secondary_nodes[0]
4917
    env = {
4918
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4919
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4920
      "OLD_PRIMARY": source_node,
4921
      "OLD_SECONDARY": target_node,
4922
      "NEW_PRIMARY": target_node,
4923
      "NEW_SECONDARY": source_node,
4924
      }
4925
    env.update(_BuildInstanceHookEnvByObject(self, instance))
4926
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4927
    nl_post = list(nl)
4928
    nl_post.append(source_node)
4929
    return env, nl, nl_post
4930

    
4931
  def CheckPrereq(self):
4932
    """Check prerequisites.
4933

4934
    This checks that the instance is in the cluster.
4935

4936
    """
4937
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4938
    assert self.instance is not None, \
4939
      "Cannot retrieve locked instance %s" % self.op.instance_name
4940

    
4941
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4942
    if instance.disk_template not in constants.DTS_NET_MIRROR:
4943
      raise errors.OpPrereqError("Instance's disk layout is not"
4944
                                 " network mirrored, cannot failover.",
4945
                                 errors.ECODE_STATE)
4946

    
4947
    secondary_nodes = instance.secondary_nodes
4948
    if not secondary_nodes:
4949
      raise errors.ProgrammerError("no secondary node but using "
4950
                                   "a mirrored disk template")
4951

    
4952
    target_node = secondary_nodes[0]
4953
    _CheckNodeOnline(self, target_node)
4954
    _CheckNodeNotDrained(self, target_node)
4955
    if instance.admin_up:
4956
      # check memory requirements on the secondary node
4957
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4958
                           instance.name, bep[constants.BE_MEMORY],
4959
                           instance.hypervisor)
4960
    else:
4961
      self.LogInfo("Not checking memory on the secondary node as"
4962
                   " instance will not be started")
4963

    
4964
    # check bridge existance
4965
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4966

    
4967
  def Exec(self, feedback_fn):
4968
    """Failover an instance.
4969

4970
    The failover is done by shutting it down on its present node and
4971
    starting it on the secondary.
4972

4973
    """
4974
    instance = self.instance
4975

    
4976
    source_node = instance.primary_node
4977
    target_node = instance.secondary_nodes[0]
4978

    
4979
    if instance.admin_up:
4980
      feedback_fn("* checking disk consistency between source and target")
4981
      for dev in instance.disks:
4982
        # for drbd, these are drbd over lvm
4983
        if not _CheckDiskConsistency(self, dev, target_node, False):
4984
          if not self.op.ignore_consistency:
4985
            raise errors.OpExecError("Disk %s is degraded on target node,"
4986
                                     " aborting failover." % dev.iv_name)
4987
    else:
4988
      feedback_fn("* not checking disk consistency as instance is not running")
4989

    
4990
    feedback_fn("* shutting down instance on source node")
4991
    logging.info("Shutting down instance %s on node %s",
4992
                 instance.name, source_node)
4993

    
4994
    result = self.rpc.call_instance_shutdown(source_node, instance,
4995
                                             self.shutdown_timeout)
4996
    msg = result.fail_msg
4997
    if msg:
4998
      if self.op.ignore_consistency:
4999
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5000
                             " Proceeding anyway. Please make sure node"
5001
                             " %s is down. Error details: %s",
5002
                             instance.name, source_node, source_node, msg)
5003
      else:
5004
        raise errors.OpExecError("Could not shutdown instance %s on"
5005
                                 " node %s: %s" %
5006
                                 (instance.name, source_node, msg))
5007

    
5008
    feedback_fn("* deactivating the instance's disks on source node")
5009
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5010
      raise errors.OpExecError("Can't shut down the instance's disks.")
5011

    
5012
    instance.primary_node = target_node
5013
    # distribute new instance config to the other nodes
5014
    self.cfg.Update(instance, feedback_fn)
5015

    
5016
    # Only start the instance if it's marked as up
5017
    if instance.admin_up:
5018
      feedback_fn("* activating the instance's disks on target node")
5019
      logging.info("Starting instance %s on node %s",
5020
                   instance.name, target_node)
5021

    
5022
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5023
                                               ignore_secondaries=True)
5024
      if not disks_ok:
5025
        _ShutdownInstanceDisks(self, instance)
5026
        raise errors.OpExecError("Can't activate the instance's disks")
5027

    
5028
      feedback_fn("* starting the instance on the target node")
5029
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5030
      msg = result.fail_msg
5031
      if msg:
5032
        _ShutdownInstanceDisks(self, instance)
5033
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5034
                                 (instance.name, target_node, msg))
5035

    
5036

    
5037
class LUMigrateInstance(LogicalUnit):
5038
  """Migrate an instance.
5039

5040
  This is migration without shutting down, compared to the failover,
5041
  which is done with shutdown.
5042

5043
  """
5044
  HPATH = "instance-migrate"
5045
  HTYPE = constants.HTYPE_INSTANCE
5046
  _OP_REQP = ["instance_name", "live", "cleanup"]
5047

    
5048
  REQ_BGL = False
5049

    
5050
  def ExpandNames(self):
5051
    self._ExpandAndLockInstance()
5052

    
5053
    self.needed_locks[locking.LEVEL_NODE] = []
5054
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5055

    
5056
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5057
                                       self.op.live, self.op.cleanup)
5058
    self.tasklets = [self._migrater]
5059

    
5060
  def DeclareLocks(self, level):
5061
    if level == locking.LEVEL_NODE:
5062
      self._LockInstancesNodes()
5063

    
5064
  def BuildHooksEnv(self):
5065
    """Build hooks env.
5066

5067
    This runs on master, primary and secondary nodes of the instance.
5068

5069
    """
5070
    instance = self._migrater.instance
5071
    source_node = instance.primary_node
5072
    target_node = instance.secondary_nodes[0]
5073
    env = _BuildInstanceHookEnvByObject(self, instance)
5074
    env["MIGRATE_LIVE"] = self.op.live
5075
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5076
    env.update({
5077
        "OLD_PRIMARY": source_node,
5078
        "OLD_SECONDARY": target_node,
5079
        "NEW_PRIMARY": target_node,
5080
        "NEW_SECONDARY": source_node,
5081
        })
5082
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5083
    nl_post = list(nl)
5084
    nl_post.append(source_node)
5085
    return env, nl, nl_post
5086

    
5087

    
5088
class LUMoveInstance(LogicalUnit):
5089
  """Move an instance by data-copying.
5090

5091
  """
5092
  HPATH = "instance-move"
5093
  HTYPE = constants.HTYPE_INSTANCE
5094
  _OP_REQP = ["instance_name", "target_node"]
5095
  REQ_BGL = False
5096

    
5097
  def CheckArguments(self):
5098
    """Check the arguments.
5099

5100
    """
5101
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5102
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
5103

    
5104
  def ExpandNames(self):
5105
    self._ExpandAndLockInstance()
5106
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5107
    self.op.target_node = target_node
5108
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5109
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5110

    
5111
  def DeclareLocks(self, level):
5112
    if level == locking.LEVEL_NODE:
5113
      self._LockInstancesNodes(primary_only=True)
5114

    
5115
  def BuildHooksEnv(self):
5116
    """Build hooks env.
5117

5118
    This runs on master, primary and secondary nodes of the instance.
5119

5120
    """
5121
    env = {
5122
      "TARGET_NODE": self.op.target_node,
5123
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5124
      }
5125
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5126
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5127
                                       self.op.target_node]
5128
    return env, nl, nl
5129

    
5130
  def CheckPrereq(self):
5131
    """Check prerequisites.
5132

5133
    This checks that the instance is in the cluster.
5134

5135
    """
5136
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5137
    assert self.instance is not None, \
5138
      "Cannot retrieve locked instance %s" % self.op.instance_name
5139

    
5140
    node = self.cfg.GetNodeInfo(self.op.target_node)
5141
    assert node is not None, \
5142
      "Cannot retrieve locked node %s" % self.op.target_node
5143

    
5144
    self.target_node = target_node = node.name
5145

    
5146
    if target_node == instance.primary_node:
5147
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5148
                                 (instance.name, target_node),
5149
                                 errors.ECODE_STATE)
5150

    
5151
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5152

    
5153
    for idx, dsk in enumerate(instance.disks):
5154
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5155
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5156
                                   " cannot copy" % idx, errors.ECODE_STATE)
5157

    
5158
    _CheckNodeOnline(self, target_node)
5159
    _CheckNodeNotDrained(self, target_node)
5160

    
5161
    if instance.admin_up:
5162
      # check memory requirements on the secondary node
5163
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5164
                           instance.name, bep[constants.BE_MEMORY],
5165
                           instance.hypervisor)
5166
    else:
5167
      self.LogInfo("Not checking memory on the secondary node as"
5168
                   " instance will not be started")
5169

    
5170
    # check bridge existance
5171
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5172

    
5173
  def Exec(self, feedback_fn):
5174
    """Move an instance.
5175

5176
    The move is done by shutting it down on its present node, copying
5177
    the data over (slow) and starting it on the new node.
5178

5179
    """
5180
    instance = self.instance
5181

    
5182
    source_node = instance.primary_node
5183
    target_node = self.target_node
5184

    
5185
    self.LogInfo("Shutting down instance %s on source node %s",
5186
                 instance.name, source_node)
5187

    
5188
    result = self.rpc.call_instance_shutdown(source_node, instance,
5189
                                             self.shutdown_timeout)
5190
    msg = result.fail_msg
5191
    if msg:
5192
      if self.op.ignore_consistency:
5193
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5194
                             " Proceeding anyway. Please make sure node"
5195
                             " %s is down. Error details: %s",
5196
                             instance.name, source_node, source_node, msg)
5197
      else:
5198
        raise errors.OpExecError("Could not shutdown instance %s on"
5199
                                 " node %s: %s" %
5200
                                 (instance.name, source_node, msg))
5201

    
5202
    # create the target disks
5203
    try:
5204
      _CreateDisks(self, instance, target_node=target_node)
5205
    except errors.OpExecError:
5206
      self.LogWarning("Device creation failed, reverting...")
5207
      try:
5208
        _RemoveDisks(self, instance, target_node=target_node)
5209
      finally:
5210
        self.cfg.ReleaseDRBDMinors(instance.name)
5211
        raise
5212

    
5213
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5214

    
5215
    errs = []
5216
    # activate, get path, copy the data over
5217
    for idx, disk in enumerate(instance.disks):
5218
      self.LogInfo("Copying data for disk %d", idx)
5219
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5220
                                               instance.name, True)
5221
      if result.fail_msg:
5222
        self.LogWarning("Can't assemble newly created disk %d: %s",
5223
                        idx, result.fail_msg)
5224
        errs.append(result.fail_msg)
5225
        break
5226
      dev_path = result.payload
5227
      result = self.rpc.call_blockdev_export(source_node, disk,
5228
                                             target_node, dev_path,
5229
                                             cluster_name)
5230
      if result.fail_msg:
5231
        self.LogWarning("Can't copy data over for disk %d: %s",
5232
                        idx, result.fail_msg)
5233
        errs.append(result.fail_msg)
5234
        break
5235

    
5236
    if errs:
5237
      self.LogWarning("Some disks failed to copy, aborting")
5238
      try:
5239
        _RemoveDisks(self, instance, target_node=target_node)
5240
      finally:
5241
        self.cfg.ReleaseDRBDMinors(instance.name)
5242
        raise errors.OpExecError("Errors during disk copy: %s" %
5243
                                 (",".join(errs),))
5244

    
5245
    instance.primary_node = target_node
5246
    self.cfg.Update(instance, feedback_fn)
5247

    
5248
    self.LogInfo("Removing the disks on the original node")
5249
    _RemoveDisks(self, instance, target_node=source_node)
5250

    
5251
    # Only start the instance if it's marked as up
5252
    if instance.admin_up:
5253
      self.LogInfo("Starting instance %s on node %s",
5254
                   instance.name, target_node)
5255

    
5256
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5257
                                           ignore_secondaries=True)
5258
      if not disks_ok:
5259
        _ShutdownInstanceDisks(self, instance)
5260
        raise errors.OpExecError("Can't activate the instance's disks")
5261

    
5262
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5263
      msg = result.fail_msg
5264
      if msg:
5265
        _ShutdownInstanceDisks(self, instance)
5266
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5267
                                 (instance.name, target_node, msg))
5268

    
5269

    
5270
class LUMigrateNode(LogicalUnit):
5271
  """Migrate all instances from a node.
5272

5273
  """
5274
  HPATH = "node-migrate"
5275
  HTYPE = constants.HTYPE_NODE
5276
  _OP_REQP = ["node_name", "live"]
5277
  REQ_BGL = False
5278

    
5279
  def ExpandNames(self):
5280
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5281

    
5282
    self.needed_locks = {
5283
      locking.LEVEL_NODE: [self.op.node_name],
5284
      }
5285

    
5286
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5287

    
5288
    # Create tasklets for migrating instances for all instances on this node
5289
    names = []
5290
    tasklets = []
5291

    
5292
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5293
      logging.debug("Migrating instance %s", inst.name)
5294
      names.append(inst.name)
5295

    
5296
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5297

    
5298
    self.tasklets = tasklets
5299

    
5300
    # Declare instance locks
5301
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5302

    
5303
  def DeclareLocks(self, level):
5304
    if level == locking.LEVEL_NODE:
5305
      self._LockInstancesNodes()
5306

    
5307
  def BuildHooksEnv(self):
5308
    """Build hooks env.
5309

5310
    This runs on the master, the primary and all the secondaries.
5311

5312
    """
5313
    env = {
5314
      "NODE_NAME": self.op.node_name,
5315
      }
5316

    
5317
    nl = [self.cfg.GetMasterNode()]
5318

    
5319
    return (env, nl, nl)
5320

    
5321

    
5322
class TLMigrateInstance(Tasklet):
5323
  def __init__(self, lu, instance_name, live, cleanup):
5324
    """Initializes this class.
5325

5326
    """
5327
    Tasklet.__init__(self, lu)
5328

    
5329
    # Parameters
5330
    self.instance_name = instance_name
5331
    self.live = live
5332
    self.cleanup = cleanup
5333

    
5334
  def CheckPrereq(self):
5335
    """Check prerequisites.
5336

5337
    This checks that the instance is in the cluster.
5338

5339
    """
5340
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5341
    instance = self.cfg.GetInstanceInfo(instance_name)
5342
    assert instance is not None
5343

    
5344
    if instance.disk_template != constants.DT_DRBD8:
5345
      raise errors.OpPrereqError("Instance's disk layout is not"
5346
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5347

    
5348
    secondary_nodes = instance.secondary_nodes
5349
    if not secondary_nodes:
5350
      raise errors.ConfigurationError("No secondary node but using"
5351
                                      " drbd8 disk template")
5352

    
5353
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5354

    
5355
    target_node = secondary_nodes[0]
5356
    # check memory requirements on the secondary node
5357
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5358
                         instance.name, i_be[constants.BE_MEMORY],
5359
                         instance.hypervisor)
5360

    
5361
    # check bridge existance
5362
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5363

    
5364
    if not self.cleanup:
5365
      _CheckNodeNotDrained(self, target_node)
5366
      result = self.rpc.call_instance_migratable(instance.primary_node,
5367
                                                 instance)
5368
      result.Raise("Can't migrate, please use failover",
5369
                   prereq=True, ecode=errors.ECODE_STATE)
5370

    
5371
    self.instance = instance
5372

    
5373
  def _WaitUntilSync(self):
5374
    """Poll with custom rpc for disk sync.
5375

5376
    This uses our own step-based rpc call.
5377

5378
    """
5379
    self.feedback_fn("* wait until resync is done")
5380
    all_done = False
5381
    while not all_done:
5382
      all_done = True
5383
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5384
                                            self.nodes_ip,
5385
                                            self.instance.disks)
5386
      min_percent = 100
5387
      for node, nres in result.items():
5388
        nres.Raise("Cannot resync disks on node %s" % node)
5389
        node_done, node_percent = nres.payload
5390
        all_done = all_done and node_done
5391
        if node_percent is not None:
5392
          min_percent = min(min_percent, node_percent)
5393
      if not all_done:
5394
        if min_percent < 100:
5395
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5396
        time.sleep(2)
5397

    
5398
  def _EnsureSecondary(self, node):
5399
    """Demote a node to secondary.
5400

5401
    """
5402
    self.feedback_fn("* switching node %s to secondary mode" % node)
5403

    
5404
    for dev in self.instance.disks:
5405
      self.cfg.SetDiskID(dev, node)
5406

    
5407
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5408
                                          self.instance.disks)
5409
    result.Raise("Cannot change disk to secondary on node %s" % node)
5410

    
5411
  def _GoStandalone(self):
5412
    """Disconnect from the network.
5413

5414
    """
5415
    self.feedback_fn("* changing into standalone mode")
5416
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5417
                                               self.instance.disks)
5418
    for node, nres in result.items():
5419
      nres.Raise("Cannot disconnect disks node %s" % node)
5420

    
5421
  def _GoReconnect(self, multimaster):
5422
    """Reconnect to the network.
5423

5424
    """
5425
    if multimaster:
5426
      msg = "dual-master"
5427
    else:
5428
      msg = "single-master"
5429
    self.feedback_fn("* changing disks into %s mode" % msg)
5430
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5431
                                           self.instance.disks,
5432
                                           self.instance.name, multimaster)
5433
    for node, nres in result.items():
5434
      nres.Raise("Cannot change disks config on node %s" % node)
5435

    
5436
  def _ExecCleanup(self):
5437
    """Try to cleanup after a failed migration.
5438

5439
    The cleanup is done by:
5440
      - check that the instance is running only on one node
5441
        (and update the config if needed)
5442
      - change disks on its secondary node to secondary
5443
      - wait until disks are fully synchronized
5444
      - disconnect from the network
5445
      - change disks into single-master mode
5446
      - wait again until disks are fully synchronized
5447

5448
    """
5449
    instance = self.instance
5450
    target_node = self.target_node
5451
    source_node = self.source_node
5452

    
5453
    # check running on only one node
5454
    self.feedback_fn("* checking where the instance actually runs"
5455
                     " (if this hangs, the hypervisor might be in"
5456
                     " a bad state)")
5457
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5458
    for node, result in ins_l.items():
5459
      result.Raise("Can't contact node %s" % node)
5460

    
5461
    runningon_source = instance.name in ins_l[source_node].payload
5462
    runningon_target = instance.name in ins_l[target_node].payload
5463

    
5464
    if runningon_source and runningon_target:
5465
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5466
                               " or the hypervisor is confused. You will have"
5467
                               " to ensure manually that it runs only on one"
5468
                               " and restart this operation.")
5469

    
5470
    if not (runningon_source or runningon_target):
5471
      raise errors.OpExecError("Instance does not seem to be running at all."
5472
                               " In this case, it's safer to repair by"
5473
                               " running 'gnt-instance stop' to ensure disk"
5474
                               " shutdown, and then restarting it.")
5475

    
5476
    if runningon_target:
5477
      # the migration has actually succeeded, we need to update the config
5478
      self.feedback_fn("* instance running on secondary node (%s),"
5479
                       " updating config" % target_node)
5480
      instance.primary_node = target_node
5481
      self.cfg.Update(instance, self.feedback_fn)
5482
      demoted_node = source_node
5483
    else:
5484
      self.feedback_fn("* instance confirmed to be running on its"
5485
                       " primary node (%s)" % source_node)
5486
      demoted_node = target_node
5487

    
5488
    self._EnsureSecondary(demoted_node)
5489
    try:
5490
      self._WaitUntilSync()
5491
    except errors.OpExecError:
5492
      # we ignore here errors, since if the device is standalone, it
5493
      # won't be able to sync
5494
      pass
5495
    self._GoStandalone()
5496
    self._GoReconnect(False)
5497
    self._WaitUntilSync()
5498

    
5499
    self.feedback_fn("* done")
5500

    
5501
  def _RevertDiskStatus(self):
5502
    """Try to revert the disk status after a failed migration.
5503

5504
    """
5505
    target_node = self.target_node
5506
    try:
5507
      self._EnsureSecondary(target_node)
5508
      self._GoStandalone()
5509
      self._GoReconnect(False)
5510
      self._WaitUntilSync()
5511
    except errors.OpExecError, err:
5512
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5513
                         " drives: error '%s'\n"
5514
                         "Please look and recover the instance status" %
5515
                         str(err))
5516

    
5517
  def _AbortMigration(self):
5518
    """Call the hypervisor code to abort a started migration.
5519

5520
    """
5521
    instance = self.instance
5522
    target_node = self.target_node
5523
    migration_info = self.migration_info
5524

    
5525
    abort_result = self.rpc.call_finalize_migration(target_node,
5526
                                                    instance,
5527
                                                    migration_info,
5528
                                                    False)
5529
    abort_msg = abort_result.fail_msg
5530
    if abort_msg:
5531
      logging.error("Aborting migration failed on target node %s: %s",
5532
                    target_node, abort_msg)
5533
      # Don't raise an exception here, as we stil have to try to revert the
5534
      # disk status, even if this step failed.
5535

    
5536
  def _ExecMigration(self):
5537
    """Migrate an instance.
5538

5539
    The migrate is done by:
5540
      - change the disks into dual-master mode
5541
      - wait until disks are fully synchronized again
5542
      - migrate the instance
5543
      - change disks on the new secondary node (the old primary) to secondary
5544
      - wait until disks are fully synchronized
5545
      - change disks into single-master mode
5546

5547
    """
5548
    instance = self.instance
5549
    target_node = self.target_node
5550
    source_node = self.source_node
5551

    
5552
    self.feedback_fn("* checking disk consistency between source and target")
5553
    for dev in instance.disks:
5554
      if not _CheckDiskConsistency(self, dev, target_node, False):
5555
        raise errors.OpExecError("Disk %s is degraded or not fully"
5556
                                 " synchronized on target node,"
5557
                                 " aborting migrate." % dev.iv_name)
5558

    
5559
    # First get the migration information from the remote node
5560
    result = self.rpc.call_migration_info(source_node, instance)
5561
    msg = result.fail_msg
5562
    if msg:
5563
      log_err = ("Failed fetching source migration information from %s: %s" %
5564
                 (source_node, msg))
5565
      logging.error(log_err)
5566
      raise errors.OpExecError(log_err)
5567

    
5568
    self.migration_info = migration_info = result.payload
5569

    
5570
    # Then switch the disks to master/master mode
5571
    self._EnsureSecondary(target_node)
5572
    self._GoStandalone()
5573
    self._GoReconnect(True)
5574
    self._WaitUntilSync()
5575

    
5576
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
5577
    result = self.rpc.call_accept_instance(target_node,
5578
                                           instance,
5579
                                           migration_info,
5580
                                           self.nodes_ip[target_node])
5581

    
5582
    msg = result.fail_msg
5583
    if msg:
5584
      logging.error("Instance pre-migration failed, trying to revert"
5585
                    " disk status: %s", msg)
5586
      self.feedback_fn("Pre-migration failed, aborting")
5587
      self._AbortMigration()
5588
      self._RevertDiskStatus()
5589
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5590
                               (instance.name, msg))
5591

    
5592
    self.feedback_fn("* migrating instance to %s" % target_node)
5593
    time.sleep(10)
5594
    result = self.rpc.call_instance_migrate(source_node, instance,
5595
                                            self.nodes_ip[target_node],
5596
                                            self.live)
5597
    msg = result.fail_msg
5598
    if msg:
5599
      logging.error("Instance migration failed, trying to revert"
5600
                    " disk status: %s", msg)
5601
      self.feedback_fn("Migration failed, aborting")
5602
      self._AbortMigration()
5603
      self._RevertDiskStatus()
5604
      raise errors.OpExecError("Could not migrate instance %s: %s" %
5605
                               (instance.name, msg))
5606
    time.sleep(10)
5607

    
5608
    instance.primary_node = target_node
5609
    # distribute new instance config to the other nodes
5610
    self.cfg.Update(instance, self.feedback_fn)
5611

    
5612
    result = self.rpc.call_finalize_migration(target_node,
5613
                                              instance,
5614
                                              migration_info,
5615
                                              True)
5616
    msg = result.fail_msg
5617
    if msg:
5618
      logging.error("Instance migration succeeded, but finalization failed:"
5619
                    " %s", msg)
5620
      raise errors.OpExecError("Could not finalize instance migration: %s" %
5621
                               msg)
5622

    
5623
    self._EnsureSecondary(source_node)
5624
    self._WaitUntilSync()
5625
    self._GoStandalone()
5626
    self._GoReconnect(False)
5627
    self._WaitUntilSync()
5628

    
5629
    self.feedback_fn("* done")
5630

    
5631
  def Exec(self, feedback_fn):
5632
    """Perform the migration.
5633

5634
    """
5635
    feedback_fn("Migrating instance %s" % self.instance.name)
5636

    
5637
    self.feedback_fn = feedback_fn
5638

    
5639
    self.source_node = self.instance.primary_node
5640
    self.target_node = self.instance.secondary_nodes[0]
5641
    self.all_nodes = [self.source_node, self.target_node]
5642
    self.nodes_ip = {
5643
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5644
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5645
      }
5646

    
5647
    if self.cleanup:
5648
      return self._ExecCleanup()
5649
    else:
5650
      return self._ExecMigration()
5651

    
5652

    
5653
def _CreateBlockDev(lu, node, instance, device, force_create,
5654
                    info, force_open):
5655
  """Create a tree of block devices on a given node.
5656

5657
  If this device type has to be created on secondaries, create it and
5658
  all its children.
5659

5660
  If not, just recurse to children keeping the same 'force' value.
5661

5662
  @param lu: the lu on whose behalf we execute
5663
  @param node: the node on which to create the device
5664
  @type instance: L{objects.Instance}
5665
  @param instance: the instance which owns the device
5666
  @type device: L{objects.Disk}
5667
  @param device: the device to create
5668
  @type force_create: boolean
5669
  @param force_create: whether to force creation of this device; this
5670
      will be change to True whenever we find a device which has
5671
      CreateOnSecondary() attribute
5672
  @param info: the extra 'metadata' we should attach to the device
5673
      (this will be represented as a LVM tag)
5674
  @type force_open: boolean
5675
  @param force_open: this parameter will be passes to the
5676
      L{backend.BlockdevCreate} function where it specifies
5677
      whether we run on primary or not, and it affects both
5678
      the child assembly and the device own Open() execution
5679

5680
  """
5681
  if device.CreateOnSecondary():
5682
    force_create = True
5683

    
5684
  if device.children:
5685
    for child in device.children:
5686
      _CreateBlockDev(lu, node, instance, child, force_create,
5687
                      info, force_open)
5688

    
5689
  if not force_create:
5690
    return
5691

    
5692
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5693

    
5694

    
5695
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5696
  """Create a single block device on a given node.
5697

5698
  This will not recurse over children of the device, so they must be
5699
  created in advance.
5700

5701
  @param lu: the lu on whose behalf we execute
5702
  @param node: the node on which to create the device
5703
  @type instance: L{objects.Instance}
5704
  @param instance: the instance which owns the device
5705
  @type device: L{objects.Disk}
5706
  @param device: the device to create
5707
  @param info: the extra 'metadata' we should attach to the device
5708
      (this will be represented as a LVM tag)
5709
  @type force_open: boolean
5710
  @param force_open: this parameter will be passes to the
5711
      L{backend.BlockdevCreate} function where it specifies
5712
      whether we run on primary or not, and it affects both
5713
      the child assembly and the device own Open() execution
5714

5715
  """
5716
  lu.cfg.SetDiskID(device, node)
5717
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5718
                                       instance.name, force_open, info)
5719
  result.Raise("Can't create block device %s on"
5720
               " node %s for instance %s" % (device, node, instance.name))
5721
  if device.physical_id is None:
5722
    device.physical_id = result.payload
5723

    
5724

    
5725
def _GenerateUniqueNames(lu, exts):
5726
  """Generate a suitable LV name.
5727

5728
  This will generate a logical volume name for the given instance.
5729

5730
  """
5731
  results = []
5732
  for val in exts:
5733
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5734
    results.append("%s%s" % (new_id, val))
5735
  return results
5736

    
5737

    
5738
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5739
                         p_minor, s_minor):
5740
  """Generate a drbd8 device complete with its children.
5741

5742
  """
5743
  port = lu.cfg.AllocatePort()
5744
  vgname = lu.cfg.GetVGName()
5745
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5746
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5747
                          logical_id=(vgname, names[0]))
5748
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5749
                          logical_id=(vgname, names[1]))
5750
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5751
                          logical_id=(primary, secondary, port,
5752
                                      p_minor, s_minor,
5753
                                      shared_secret),
5754
                          children=[dev_data, dev_meta],
5755
                          iv_name=iv_name)
5756
  return drbd_dev
5757

    
5758

    
5759
def _GenerateDiskTemplate(lu, template_name,
5760
                          instance_name, primary_node,
5761
                          secondary_nodes, disk_info,
5762
                          file_storage_dir, file_driver,
5763
                          base_index):
5764
  """Generate the entire disk layout for a given template type.
5765

5766
  """
5767
  #TODO: compute space requirements
5768

    
5769
  vgname = lu.cfg.GetVGName()
5770
  disk_count = len(disk_info)
5771
  disks = []
5772
  if template_name == constants.DT_DISKLESS:
5773
    pass
5774
  elif template_name == constants.DT_PLAIN:
5775
    if len(secondary_nodes) != 0:
5776
      raise errors.ProgrammerError("Wrong template configuration")
5777

    
5778
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5779
                                      for i in range(disk_count)])
5780
    for idx, disk in enumerate(disk_info):
5781
      disk_index = idx + base_index
5782
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5783
                              logical_id=(vgname, names[idx]),
5784
                              iv_name="disk/%d" % disk_index,
5785
                              mode=disk["mode"])
5786
      disks.append(disk_dev)
5787
  elif template_name == constants.DT_DRBD8:
5788
    if len(secondary_nodes) != 1:
5789
      raise errors.ProgrammerError("Wrong template configuration")
5790
    remote_node = secondary_nodes[0]
5791
    minors = lu.cfg.AllocateDRBDMinor(
5792
      [primary_node, remote_node] * len(disk_info), instance_name)
5793

    
5794
    names = []
5795
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5796
                                               for i in range(disk_count)]):
5797
      names.append(lv_prefix + "_data")
5798
      names.append(lv_prefix + "_meta")
5799
    for idx, disk in enumerate(disk_info):
5800
      disk_index = idx + base_index
5801
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5802
                                      disk["size"], names[idx*2:idx*2+2],
5803
                                      "disk/%d" % disk_index,
5804
                                      minors[idx*2], minors[idx*2+1])
5805
      disk_dev.mode = disk["mode"]
5806
      disks.append(disk_dev)
5807
  elif template_name == constants.DT_FILE:
5808
    if len(secondary_nodes) != 0:
5809
      raise errors.ProgrammerError("Wrong template configuration")
5810

    
5811
    for idx, disk in enumerate(disk_info):
5812
      disk_index = idx + base_index
5813
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5814
                              iv_name="disk/%d" % disk_index,
5815
                              logical_id=(file_driver,
5816
                                          "%s/disk%d" % (file_storage_dir,
5817
                                                         disk_index)),
5818
                              mode=disk["mode"])
5819
      disks.append(disk_dev)
5820
  else:
5821
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5822
  return disks
5823

    
5824

    
5825
def _GetInstanceInfoText(instance):
5826
  """Compute that text that should be added to the disk's metadata.
5827

5828
  """
5829
  return "originstname+%s" % instance.name
5830

    
5831

    
5832
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5833
  """Create all disks for an instance.
5834

5835
  This abstracts away some work from AddInstance.
5836

5837
  @type lu: L{LogicalUnit}
5838
  @param lu: the logical unit on whose behalf we execute
5839
  @type instance: L{objects.Instance}
5840
  @param instance: the instance whose disks we should create
5841
  @type to_skip: list
5842
  @param to_skip: list of indices to skip
5843
  @type target_node: string
5844
  @param target_node: if passed, overrides the target node for creation
5845
  @rtype: boolean
5846
  @return: the success of the creation
5847

5848
  """
5849
  info = _GetInstanceInfoText(instance)
5850
  if target_node is None:
5851
    pnode = instance.primary_node
5852
    all_nodes = instance.all_nodes
5853
  else:
5854
    pnode = target_node
5855
    all_nodes = [pnode]
5856

    
5857
  if instance.disk_template == constants.DT_FILE:
5858
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5859
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5860

    
5861
    result.Raise("Failed to create directory '%s' on"
5862
                 " node %s" % (file_storage_dir, pnode))
5863

    
5864
  # Note: this needs to be kept in sync with adding of disks in
5865
  # LUSetInstanceParams
5866
  for idx, device in enumerate(instance.disks):
5867
    if to_skip and idx in to_skip:
5868
      continue
5869
    logging.info("Creating volume %s for instance %s",
5870
                 device.iv_name, instance.name)
5871
    #HARDCODE
5872
    for node in all_nodes:
5873
      f_create = node == pnode
5874
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5875

    
5876

    
5877
def _RemoveDisks(lu, instance, target_node=None):
5878
  """Remove all disks for an instance.
5879

5880
  This abstracts away some work from `AddInstance()` and
5881
  `RemoveInstance()`. Note that in case some of the devices couldn't
5882
  be removed, the removal will continue with the other ones (compare
5883
  with `_CreateDisks()`).
5884

5885
  @type lu: L{LogicalUnit}
5886
  @param lu: the logical unit on whose behalf we execute
5887
  @type instance: L{objects.Instance}
5888
  @param instance: the instance whose disks we should remove
5889
  @type target_node: string
5890
  @param target_node: used to override the node on which to remove the disks
5891
  @rtype: boolean
5892
  @return: the success of the removal
5893

5894
  """
5895
  logging.info("Removing block devices for instance %s", instance.name)
5896

    
5897
  all_result = True
5898
  for device in instance.disks:
5899
    if target_node:
5900
      edata = [(target_node, device)]
5901
    else:
5902
      edata = device.ComputeNodeTree(instance.primary_node)
5903
    for node, disk in edata:
5904
      lu.cfg.SetDiskID(disk, node)
5905
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5906
      if msg:
5907
        lu.LogWarning("Could not remove block device %s on node %s,"
5908
                      " continuing anyway: %s", device.iv_name, node, msg)
5909
        all_result = False
5910

    
5911
  if instance.disk_template == constants.DT_FILE:
5912
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5913
    if target_node:
5914
      tgt = target_node
5915
    else:
5916
      tgt = instance.primary_node
5917
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5918
    if result.fail_msg:
5919
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5920
                    file_storage_dir, instance.primary_node, result.fail_msg)
5921
      all_result = False
5922

    
5923
  return all_result
5924

    
5925

    
5926
def _ComputeDiskSize(disk_template, disks):
5927
  """Compute disk size requirements in the volume group
5928

5929
  """
5930
  # Required free disk space as a function of disk and swap space
5931
  req_size_dict = {
5932
    constants.DT_DISKLESS: None,
5933
    constants.DT_PLAIN: sum(d["size"] for d in disks),
5934
    # 128 MB are added for drbd metadata for each disk
5935
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5936
    constants.DT_FILE: None,
5937
  }
5938

    
5939
  if disk_template not in req_size_dict:
5940
    raise errors.ProgrammerError("Disk template '%s' size requirement"
5941
                                 " is unknown" %  disk_template)
5942

    
5943
  return req_size_dict[disk_template]
5944

    
5945

    
5946
def _CheckHVParams(lu, nodenames, hvname, hvparams):
5947
  """Hypervisor parameter validation.
5948

5949
  This function abstract the hypervisor parameter validation to be
5950
  used in both instance create and instance modify.
5951

5952
  @type lu: L{LogicalUnit}
5953
  @param lu: the logical unit for which we check
5954
  @type nodenames: list
5955
  @param nodenames: the list of nodes on which we should check
5956
  @type hvname: string
5957
  @param hvname: the name of the hypervisor we should use
5958
  @type hvparams: dict
5959
  @param hvparams: the parameters which we need to check
5960
  @raise errors.OpPrereqError: if the parameters are not valid
5961

5962
  """
5963
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5964
                                                  hvname,
5965
                                                  hvparams)
5966
  for node in nodenames:
5967
    info = hvinfo[node]
5968
    if info.offline:
5969
      continue
5970
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
5971

    
5972

    
5973
class LUCreateInstance(LogicalUnit):
5974
  """Create an instance.
5975

5976
  """
5977
  HPATH = "instance-add"
5978
  HTYPE = constants.HTYPE_INSTANCE
5979
  _OP_REQP = ["instance_name", "disks",
5980
              "mode", "start",
5981
              "wait_for_sync", "ip_check", "nics",
5982
              "hvparams", "beparams"]
5983
  REQ_BGL = False
5984

    
5985
  def CheckArguments(self):
5986
    """Check arguments.
5987

5988
    """
5989
    # set optional parameters to none if they don't exist
5990
    for attr in ["pnode", "snode", "iallocator", "hypervisor",
5991
                 "disk_template", "identify_defaults"]:
5992
      if not hasattr(self.op, attr):
5993
        setattr(self.op, attr, None)
5994

    
5995
    # do not require name_check to ease forward/backward compatibility
5996
    # for tools
5997
    if not hasattr(self.op, "name_check"):
5998
      self.op.name_check = True
5999
    if not hasattr(self.op, "no_install"):
6000
      self.op.no_install = False
6001
    if self.op.no_install and self.op.start:
6002
      self.LogInfo("No-installation mode selected, disabling startup")
6003
      self.op.start = False
6004
    # validate/normalize the instance name
6005
    self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6006
    if self.op.ip_check and not self.op.name_check:
6007
      # TODO: make the ip check more flexible and not depend on the name check
6008
      raise errors.OpPrereqError("Cannot do ip checks without a name check",
6009
                                 errors.ECODE_INVAL)
6010
    # check disk information: either all adopt, or no adopt
6011
    has_adopt = has_no_adopt = False
6012
    for disk in self.op.disks:
6013
      if "adopt" in disk:
6014
        has_adopt = True
6015
      else:
6016
        has_no_adopt = True
6017
    if has_adopt and has_no_adopt:
6018
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6019
                                 errors.ECODE_INVAL)
6020
    if has_adopt:
6021
      if self.op.disk_template != constants.DT_PLAIN:
6022
        raise errors.OpPrereqError("Disk adoption is only supported for the"
6023
                                   " 'plain' disk template",
6024
                                   errors.ECODE_INVAL)
6025
      if self.op.iallocator is not None:
6026
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6027
                                   " iallocator script", errors.ECODE_INVAL)
6028
      if self.op.mode == constants.INSTANCE_IMPORT:
6029
        raise errors.OpPrereqError("Disk adoption not allowed for"
6030
                                   " instance import", errors.ECODE_INVAL)
6031

    
6032
    self.adopt_disks = has_adopt
6033

    
6034
    # verify creation mode
6035
    if self.op.mode not in (constants.INSTANCE_CREATE,
6036
                            constants.INSTANCE_IMPORT):
6037
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6038
                                 self.op.mode, errors.ECODE_INVAL)
6039

    
6040
    # instance name verification
6041
    if self.op.name_check:
6042
      self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6043
      self.op.instance_name = self.hostname1.name
6044
      # used in CheckPrereq for ip ping check
6045
      self.check_ip = self.hostname1.ip
6046
    else:
6047
      self.check_ip = None
6048

    
6049
    # file storage checks
6050
    if (self.op.file_driver and
6051
        not self.op.file_driver in constants.FILE_DRIVER):
6052
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6053
                                 self.op.file_driver, errors.ECODE_INVAL)
6054

    
6055
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6056
      raise errors.OpPrereqError("File storage directory path not absolute",
6057
                                 errors.ECODE_INVAL)
6058

    
6059
    ### Node/iallocator related checks
6060
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
6061
      raise errors.OpPrereqError("One and only one of iallocator and primary"
6062
                                 " node must be given",
6063
                                 errors.ECODE_INVAL)
6064

    
6065
    if self.op.mode == constants.INSTANCE_IMPORT:
6066
      # On import force_variant must be True, because if we forced it at
6067
      # initial install, our only chance when importing it back is that it
6068
      # works again!
6069
      self.op.force_variant = True
6070

    
6071
      if self.op.no_install:
6072
        self.LogInfo("No-installation mode has no effect during import")
6073

    
6074
    else: # INSTANCE_CREATE
6075
      if getattr(self.op, "os_type", None) is None:
6076
        raise errors.OpPrereqError("No guest OS specified",
6077
                                   errors.ECODE_INVAL)
6078
      self.op.force_variant = getattr(self.op, "force_variant", False)
6079
      if self.op.disk_template is None:
6080
        raise errors.OpPrereqError("No disk template specified",
6081
                                   errors.ECODE_INVAL)
6082

    
6083
  def ExpandNames(self):
6084
    """ExpandNames for CreateInstance.
6085

6086
    Figure out the right locks for instance creation.
6087

6088
    """
6089
    self.needed_locks = {}
6090

    
6091
    instance_name = self.op.instance_name
6092
    # this is just a preventive check, but someone might still add this
6093
    # instance in the meantime, and creation will fail at lock-add time
6094
    if instance_name in self.cfg.GetInstanceList():
6095
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6096
                                 instance_name, errors.ECODE_EXISTS)
6097

    
6098
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6099

    
6100
    if self.op.iallocator:
6101
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6102
    else:
6103
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6104
      nodelist = [self.op.pnode]
6105
      if self.op.snode is not None:
6106
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6107
        nodelist.append(self.op.snode)
6108
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6109

    
6110
    # in case of import lock the source node too
6111
    if self.op.mode == constants.INSTANCE_IMPORT:
6112
      src_node = getattr(self.op, "src_node", None)
6113
      src_path = getattr(self.op, "src_path", None)
6114

    
6115
      if src_path is None:
6116
        self.op.src_path = src_path = self.op.instance_name
6117

    
6118
      if src_node is None:
6119
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6120
        self.op.src_node = None
6121
        if os.path.isabs(src_path):
6122
          raise errors.OpPrereqError("Importing an instance from an absolute"
6123
                                     " path requires a source node option.",
6124
                                     errors.ECODE_INVAL)
6125
      else:
6126
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6127
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6128
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6129
        if not os.path.isabs(src_path):
6130
          self.op.src_path = src_path = \
6131
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6132

    
6133
  def _RunAllocator(self):
6134
    """Run the allocator based on input opcode.
6135

6136
    """
6137
    nics = [n.ToDict() for n in self.nics]
6138
    ial = IAllocator(self.cfg, self.rpc,
6139
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6140
                     name=self.op.instance_name,
6141
                     disk_template=self.op.disk_template,
6142
                     tags=[],
6143
                     os=self.op.os_type,
6144
                     vcpus=self.be_full[constants.BE_VCPUS],
6145
                     mem_size=self.be_full[constants.BE_MEMORY],
6146
                     disks=self.disks,
6147
                     nics=nics,
6148
                     hypervisor=self.op.hypervisor,
6149
                     )
6150

    
6151
    ial.Run(self.op.iallocator)
6152

    
6153
    if not ial.success:
6154
      raise errors.OpPrereqError("Can't compute nodes using"
6155
                                 " iallocator '%s': %s" %
6156
                                 (self.op.iallocator, ial.info),
6157
                                 errors.ECODE_NORES)
6158
    if len(ial.result) != ial.required_nodes:
6159
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6160
                                 " of nodes (%s), required %s" %
6161
                                 (self.op.iallocator, len(ial.result),
6162
                                  ial.required_nodes), errors.ECODE_FAULT)
6163
    self.op.pnode = ial.result[0]
6164
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6165
                 self.op.instance_name, self.op.iallocator,
6166
                 utils.CommaJoin(ial.result))
6167
    if ial.required_nodes == 2:
6168
      self.op.snode = ial.result[1]
6169

    
6170
  def BuildHooksEnv(self):
6171
    """Build hooks env.
6172

6173
    This runs on master, primary and secondary nodes of the instance.
6174

6175
    """
6176
    env = {
6177
      "ADD_MODE": self.op.mode,
6178
      }
6179
    if self.op.mode == constants.INSTANCE_IMPORT:
6180
      env["SRC_NODE"] = self.op.src_node
6181
      env["SRC_PATH"] = self.op.src_path
6182
      env["SRC_IMAGES"] = self.src_images
6183

    
6184
    env.update(_BuildInstanceHookEnv(
6185
      name=self.op.instance_name,
6186
      primary_node=self.op.pnode,
6187
      secondary_nodes=self.secondaries,
6188
      status=self.op.start,
6189
      os_type=self.op.os_type,
6190
      memory=self.be_full[constants.BE_MEMORY],
6191
      vcpus=self.be_full[constants.BE_VCPUS],
6192
      nics=_NICListToTuple(self, self.nics),
6193
      disk_template=self.op.disk_template,
6194
      disks=[(d["size"], d["mode"]) for d in self.disks],
6195
      bep=self.be_full,
6196
      hvp=self.hv_full,
6197
      hypervisor_name=self.op.hypervisor,
6198
    ))
6199

    
6200
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6201
          self.secondaries)
6202
    return env, nl, nl
6203

    
6204
  def _ReadExportInfo(self):
6205
    """Reads the export information from disk.
6206

6207
    It will override the opcode source node and path with the actual
6208
    information, if these two were not specified before.
6209

6210
    @return: the export information
6211

6212
    """
6213
    assert self.op.mode == constants.INSTANCE_IMPORT
6214

    
6215
    src_node = self.op.src_node
6216
    src_path = self.op.src_path
6217

    
6218
    if src_node is None:
6219
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6220
      exp_list = self.rpc.call_export_list(locked_nodes)
6221
      found = False
6222
      for node in exp_list:
6223
        if exp_list[node].fail_msg:
6224
          continue
6225
        if src_path in exp_list[node].payload:
6226
          found = True
6227
          self.op.src_node = src_node = node
6228
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6229
                                                       src_path)
6230
          break
6231
      if not found:
6232
        raise errors.OpPrereqError("No export found for relative path %s" %
6233
                                    src_path, errors.ECODE_INVAL)
6234

    
6235
    _CheckNodeOnline(self, src_node)
6236
    result = self.rpc.call_export_info(src_node, src_path)
6237
    result.Raise("No export or invalid export found in dir %s" % src_path)
6238

    
6239
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6240
    if not export_info.has_section(constants.INISECT_EXP):
6241
      raise errors.ProgrammerError("Corrupted export config",
6242
                                   errors.ECODE_ENVIRON)
6243

    
6244
    ei_version = export_info.get(constants.INISECT_EXP, "version")
6245
    if (int(ei_version) != constants.EXPORT_VERSION):
6246
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6247
                                 (ei_version, constants.EXPORT_VERSION),
6248
                                 errors.ECODE_ENVIRON)
6249
    return export_info
6250

    
6251
  def _ReadExportParams(self, einfo):
6252
    """Use export parameters as defaults.
6253

6254
    In case the opcode doesn't specify (as in override) some instance
6255
    parameters, then try to use them from the export information, if
6256
    that declares them.
6257

6258
    """
6259
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6260

    
6261
    if self.op.disk_template is None:
6262
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
6263
        self.op.disk_template = einfo.get(constants.INISECT_INS,
6264
                                          "disk_template")
6265
      else:
6266
        raise errors.OpPrereqError("No disk template specified and the export"
6267
                                   " is missing the disk_template information",
6268
                                   errors.ECODE_INVAL)
6269

    
6270
    if not self.op.disks:
6271
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
6272
        disks = []
6273
        # TODO: import the disk iv_name too
6274
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6275
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6276
          disks.append({"size": disk_sz})
6277
        self.op.disks = disks
6278
      else:
6279
        raise errors.OpPrereqError("No disk info specified and the export"
6280
                                   " is missing the disk information",
6281
                                   errors.ECODE_INVAL)
6282

    
6283
    if (not self.op.nics and
6284
        einfo.has_option(constants.INISECT_INS, "nic_count")):
6285
      nics = []
6286
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6287
        ndict = {}
6288
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6289
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6290
          ndict[name] = v
6291
        nics.append(ndict)
6292
      self.op.nics = nics
6293

    
6294
    if (self.op.hypervisor is None and
6295
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
6296
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6297
    if einfo.has_section(constants.INISECT_HYP):
6298
      # use the export parameters but do not override the ones
6299
      # specified by the user
6300
      for name, value in einfo.items(constants.INISECT_HYP):
6301
        if name not in self.op.hvparams:
6302
          self.op.hvparams[name] = value
6303

    
6304
    if einfo.has_section(constants.INISECT_BEP):
6305
      # use the parameters, without overriding
6306
      for name, value in einfo.items(constants.INISECT_BEP):
6307
        if name not in self.op.beparams:
6308
          self.op.beparams[name] = value
6309
    else:
6310
      # try to read the parameters old style, from the main section
6311
      for name in constants.BES_PARAMETERS:
6312
        if (name not in self.op.beparams and
6313
            einfo.has_option(constants.INISECT_INS, name)):
6314
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6315

    
6316
  def _RevertToDefaults(self, cluster):
6317
    """Revert the instance parameters to the default values.
6318

6319
    """
6320
    # hvparams
6321
    hv_defs = cluster.GetHVDefaults(self.op.hypervisor, self.op.os_type)
6322
    for name in self.op.hvparams.keys():
6323
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6324
        del self.op.hvparams[name]
6325
    # beparams
6326
    be_defs = cluster.beparams.get(constants.PP_DEFAULT, {})
6327
    for name in self.op.beparams.keys():
6328
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
6329
        del self.op.beparams[name]
6330
    # nic params
6331
    nic_defs = cluster.nicparams.get(constants.PP_DEFAULT, {})
6332
    for nic in self.op.nics:
6333
      for name in constants.NICS_PARAMETERS:
6334
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6335
          del nic[name]
6336

    
6337
  def CheckPrereq(self):
6338
    """Check prerequisites.
6339

6340
    """
6341
    if self.op.mode == constants.INSTANCE_IMPORT:
6342
      export_info = self._ReadExportInfo()
6343
      self._ReadExportParams(export_info)
6344

    
6345
    _CheckDiskTemplate(self.op.disk_template)
6346

    
6347
    if (not self.cfg.GetVGName() and
6348
        self.op.disk_template not in constants.DTS_NOT_LVM):
6349
      raise errors.OpPrereqError("Cluster does not support lvm-based"
6350
                                 " instances", errors.ECODE_STATE)
6351

    
6352
    if self.op.hypervisor is None:
6353
      self.op.hypervisor = self.cfg.GetHypervisorType()
6354

    
6355
    cluster = self.cfg.GetClusterInfo()
6356
    enabled_hvs = cluster.enabled_hypervisors
6357
    if self.op.hypervisor not in enabled_hvs:
6358
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6359
                                 " cluster (%s)" % (self.op.hypervisor,
6360
                                  ",".join(enabled_hvs)),
6361
                                 errors.ECODE_STATE)
6362

    
6363
    # check hypervisor parameter syntax (locally)
6364
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6365
    filled_hvp = objects.FillDict(cluster.GetHVDefaults(self.op.hypervisor,
6366
                                                        self.op.os_type),
6367
                                  self.op.hvparams)
6368
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6369
    hv_type.CheckParameterSyntax(filled_hvp)
6370
    self.hv_full = filled_hvp
6371
    # check that we don't specify global parameters on an instance
6372
    _CheckGlobalHvParams(self.op.hvparams)
6373

    
6374
    # fill and remember the beparams dict
6375
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6376
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
6377
                                    self.op.beparams)
6378

    
6379
    # now that hvp/bep are in final format, let's reset to defaults,
6380
    # if told to do so
6381
    if self.op.identify_defaults:
6382
      self._RevertToDefaults(cluster)
6383

    
6384
    # NIC buildup
6385
    self.nics = []
6386
    for idx, nic in enumerate(self.op.nics):
6387
      nic_mode_req = nic.get("mode", None)
6388
      nic_mode = nic_mode_req
6389
      if nic_mode is None:
6390
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6391

    
6392
      # in routed mode, for the first nic, the default ip is 'auto'
6393
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6394
        default_ip_mode = constants.VALUE_AUTO
6395
      else:
6396
        default_ip_mode = constants.VALUE_NONE
6397

    
6398
      # ip validity checks
6399
      ip = nic.get("ip", default_ip_mode)
6400
      if ip is None or ip.lower() == constants.VALUE_NONE:
6401
        nic_ip = None
6402
      elif ip.lower() == constants.VALUE_AUTO:
6403
        if not self.op.name_check:
6404
          raise errors.OpPrereqError("IP address set to auto but name checks"
6405
                                     " have been skipped. Aborting.",
6406
                                     errors.ECODE_INVAL)
6407
        nic_ip = self.hostname1.ip
6408
      else:
6409
        if not utils.IsValidIP(ip):
6410
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6411
                                     " like a valid IP" % ip,
6412
                                     errors.ECODE_INVAL)
6413
        nic_ip = ip
6414

    
6415
      # TODO: check the ip address for uniqueness
6416
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6417
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
6418
                                   errors.ECODE_INVAL)
6419

    
6420
      # MAC address verification
6421
      mac = nic.get("mac", constants.VALUE_AUTO)
6422
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6423
        mac = utils.NormalizeAndValidateMac(mac)
6424

    
6425
        try:
6426
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
6427
        except errors.ReservationError:
6428
          raise errors.OpPrereqError("MAC address %s already in use"
6429
                                     " in cluster" % mac,
6430
                                     errors.ECODE_NOTUNIQUE)
6431

    
6432
      # bridge verification
6433
      bridge = nic.get("bridge", None)
6434
      link = nic.get("link", None)
6435
      if bridge and link:
6436
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6437
                                   " at the same time", errors.ECODE_INVAL)
6438
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6439
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6440
                                   errors.ECODE_INVAL)
6441
      elif bridge:
6442
        link = bridge
6443

    
6444
      nicparams = {}
6445
      if nic_mode_req:
6446
        nicparams[constants.NIC_MODE] = nic_mode_req
6447
      if link:
6448
        nicparams[constants.NIC_LINK] = link
6449

    
6450
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
6451
                                      nicparams)
6452
      objects.NIC.CheckParameterSyntax(check_params)
6453
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6454

    
6455
    # disk checks/pre-build
6456
    self.disks = []
6457
    for disk in self.op.disks:
6458
      mode = disk.get("mode", constants.DISK_RDWR)
6459
      if mode not in constants.DISK_ACCESS_SET:
6460
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6461
                                   mode, errors.ECODE_INVAL)
6462
      size = disk.get("size", None)
6463
      if size is None:
6464
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6465
      try:
6466
        size = int(size)
6467
      except (TypeError, ValueError):
6468
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6469
                                   errors.ECODE_INVAL)
6470
      new_disk = {"size": size, "mode": mode}
6471
      if "adopt" in disk:
6472
        new_disk["adopt"] = disk["adopt"]
6473
      self.disks.append(new_disk)
6474

    
6475
    if self.op.mode == constants.INSTANCE_IMPORT:
6476

    
6477
      # Check that the new instance doesn't have less disks than the export
6478
      instance_disks = len(self.disks)
6479
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6480
      if instance_disks < export_disks:
6481
        raise errors.OpPrereqError("Not enough disks to import."
6482
                                   " (instance: %d, export: %d)" %
6483
                                   (instance_disks, export_disks),
6484
                                   errors.ECODE_INVAL)
6485

    
6486
      disk_images = []
6487
      for idx in range(export_disks):
6488
        option = 'disk%d_dump' % idx
6489
        if export_info.has_option(constants.INISECT_INS, option):
6490
          # FIXME: are the old os-es, disk sizes, etc. useful?
6491
          export_name = export_info.get(constants.INISECT_INS, option)
6492
          image = utils.PathJoin(self.op.src_path, export_name)
6493
          disk_images.append(image)
6494
        else:
6495
          disk_images.append(False)
6496

    
6497
      self.src_images = disk_images
6498

    
6499
      old_name = export_info.get(constants.INISECT_INS, 'name')
6500
      try:
6501
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6502
      except (TypeError, ValueError), err:
6503
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
6504
                                   " an integer: %s" % str(err),
6505
                                   errors.ECODE_STATE)
6506
      if self.op.instance_name == old_name:
6507
        for idx, nic in enumerate(self.nics):
6508
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6509
            nic_mac_ini = 'nic%d_mac' % idx
6510
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6511

    
6512
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6513

    
6514
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
6515
    if self.op.ip_check:
6516
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6517
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6518
                                   (self.check_ip, self.op.instance_name),
6519
                                   errors.ECODE_NOTUNIQUE)
6520

    
6521
    #### mac address generation
6522
    # By generating here the mac address both the allocator and the hooks get
6523
    # the real final mac address rather than the 'auto' or 'generate' value.
6524
    # There is a race condition between the generation and the instance object
6525
    # creation, which means that we know the mac is valid now, but we're not
6526
    # sure it will be when we actually add the instance. If things go bad
6527
    # adding the instance will abort because of a duplicate mac, and the
6528
    # creation job will fail.
6529
    for nic in self.nics:
6530
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6531
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6532

    
6533
    #### allocator run
6534

    
6535
    if self.op.iallocator is not None:
6536
      self._RunAllocator()
6537

    
6538
    #### node related checks
6539

    
6540
    # check primary node
6541
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6542
    assert self.pnode is not None, \
6543
      "Cannot retrieve locked node %s" % self.op.pnode
6544
    if pnode.offline:
6545
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6546
                                 pnode.name, errors.ECODE_STATE)
6547
    if pnode.drained:
6548
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6549
                                 pnode.name, errors.ECODE_STATE)
6550

    
6551
    self.secondaries = []
6552

    
6553
    # mirror node verification
6554
    if self.op.disk_template in constants.DTS_NET_MIRROR:
6555
      if self.op.snode is None:
6556
        raise errors.OpPrereqError("The networked disk templates need"
6557
                                   " a mirror node", errors.ECODE_INVAL)
6558
      if self.op.snode == pnode.name:
6559
        raise errors.OpPrereqError("The secondary node cannot be the"
6560
                                   " primary node.", errors.ECODE_INVAL)
6561
      _CheckNodeOnline(self, self.op.snode)
6562
      _CheckNodeNotDrained(self, self.op.snode)
6563
      self.secondaries.append(self.op.snode)
6564

    
6565
    nodenames = [pnode.name] + self.secondaries
6566

    
6567
    req_size = _ComputeDiskSize(self.op.disk_template,
6568
                                self.disks)
6569

    
6570
    # Check lv size requirements, if not adopting
6571
    if req_size is not None and not self.adopt_disks:
6572
      _CheckNodesFreeDisk(self, nodenames, req_size)
6573

    
6574
    if self.adopt_disks: # instead, we must check the adoption data
6575
      all_lvs = set([i["adopt"] for i in self.disks])
6576
      if len(all_lvs) != len(self.disks):
6577
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
6578
                                   errors.ECODE_INVAL)
6579
      for lv_name in all_lvs:
6580
        try:
6581
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6582
        except errors.ReservationError:
6583
          raise errors.OpPrereqError("LV named %s used by another instance" %
6584
                                     lv_name, errors.ECODE_NOTUNIQUE)
6585

    
6586
      node_lvs = self.rpc.call_lv_list([pnode.name],
6587
                                       self.cfg.GetVGName())[pnode.name]
6588
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6589
      node_lvs = node_lvs.payload
6590
      delta = all_lvs.difference(node_lvs.keys())
6591
      if delta:
6592
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
6593
                                   utils.CommaJoin(delta),
6594
                                   errors.ECODE_INVAL)
6595
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6596
      if online_lvs:
6597
        raise errors.OpPrereqError("Online logical volumes found, cannot"
6598
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
6599
                                   errors.ECODE_STATE)
6600
      # update the size of disk based on what is found
6601
      for dsk in self.disks:
6602
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6603

    
6604
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6605

    
6606
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6607

    
6608
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6609

    
6610
    # memory check on primary node
6611
    if self.op.start:
6612
      _CheckNodeFreeMemory(self, self.pnode.name,
6613
                           "creating instance %s" % self.op.instance_name,
6614
                           self.be_full[constants.BE_MEMORY],
6615
                           self.op.hypervisor)
6616

    
6617
    self.dry_run_result = list(nodenames)
6618

    
6619
  def Exec(self, feedback_fn):
6620
    """Create and add the instance to the cluster.
6621

6622
    """
6623
    instance = self.op.instance_name
6624
    pnode_name = self.pnode.name
6625

    
6626
    ht_kind = self.op.hypervisor
6627
    if ht_kind in constants.HTS_REQ_PORT:
6628
      network_port = self.cfg.AllocatePort()
6629
    else:
6630
      network_port = None
6631

    
6632
    ##if self.op.vnc_bind_address is None:
6633
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
6634

    
6635
    # this is needed because os.path.join does not accept None arguments
6636
    if self.op.file_storage_dir is None:
6637
      string_file_storage_dir = ""
6638
    else:
6639
      string_file_storage_dir = self.op.file_storage_dir
6640

    
6641
    # build the full file storage dir path
6642
    file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6643
                                      string_file_storage_dir, instance)
6644

    
6645

    
6646
    disks = _GenerateDiskTemplate(self,
6647
                                  self.op.disk_template,
6648
                                  instance, pnode_name,
6649
                                  self.secondaries,
6650
                                  self.disks,
6651
                                  file_storage_dir,
6652
                                  self.op.file_driver,
6653
                                  0)
6654

    
6655
    iobj = objects.Instance(name=instance, os=self.op.os_type,
6656
                            primary_node=pnode_name,
6657
                            nics=self.nics, disks=disks,
6658
                            disk_template=self.op.disk_template,
6659
                            admin_up=False,
6660
                            network_port=network_port,
6661
                            beparams=self.op.beparams,
6662
                            hvparams=self.op.hvparams,
6663
                            hypervisor=self.op.hypervisor,
6664
                            )
6665

    
6666
    if self.adopt_disks:
6667
      # rename LVs to the newly-generated names; we need to construct
6668
      # 'fake' LV disks with the old data, plus the new unique_id
6669
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6670
      rename_to = []
6671
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6672
        rename_to.append(t_dsk.logical_id)
6673
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6674
        self.cfg.SetDiskID(t_dsk, pnode_name)
6675
      result = self.rpc.call_blockdev_rename(pnode_name,
6676
                                             zip(tmp_disks, rename_to))
6677
      result.Raise("Failed to rename adoped LVs")
6678
    else:
6679
      feedback_fn("* creating instance disks...")
6680
      try:
6681
        _CreateDisks(self, iobj)
6682
      except errors.OpExecError:
6683
        self.LogWarning("Device creation failed, reverting...")
6684
        try:
6685
          _RemoveDisks(self, iobj)
6686
        finally:
6687
          self.cfg.ReleaseDRBDMinors(instance)
6688
          raise
6689

    
6690
    feedback_fn("adding instance %s to cluster config" % instance)
6691

    
6692
    self.cfg.AddInstance(iobj, self.proc.GetECId())
6693

    
6694
    # Declare that we don't want to remove the instance lock anymore, as we've
6695
    # added the instance to the config
6696
    del self.remove_locks[locking.LEVEL_INSTANCE]
6697
    # Unlock all the nodes
6698
    if self.op.mode == constants.INSTANCE_IMPORT:
6699
      nodes_keep = [self.op.src_node]
6700
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6701
                       if node != self.op.src_node]
6702
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6703
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6704
    else:
6705
      self.context.glm.release(locking.LEVEL_NODE)
6706
      del self.acquired_locks[locking.LEVEL_NODE]
6707

    
6708
    if self.op.wait_for_sync:
6709
      disk_abort = not _WaitForSync(self, iobj)
6710
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
6711
      # make sure the disks are not degraded (still sync-ing is ok)
6712
      time.sleep(15)
6713
      feedback_fn("* checking mirrors status")
6714
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6715
    else:
6716
      disk_abort = False
6717

    
6718
    if disk_abort:
6719
      _RemoveDisks(self, iobj)
6720
      self.cfg.RemoveInstance(iobj.name)
6721
      # Make sure the instance lock gets removed
6722
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6723
      raise errors.OpExecError("There are some degraded disks for"
6724
                               " this instance")
6725

    
6726
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6727
      if self.op.mode == constants.INSTANCE_CREATE:
6728
        if not self.op.no_install:
6729
          feedback_fn("* running the instance OS create scripts...")
6730
          # FIXME: pass debug option from opcode to backend
6731
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6732
                                                 self.op.debug_level)
6733
          result.Raise("Could not add os for instance %s"
6734
                       " on node %s" % (instance, pnode_name))
6735

    
6736
      elif self.op.mode == constants.INSTANCE_IMPORT:
6737
        feedback_fn("* running the instance OS import scripts...")
6738
        src_node = self.op.src_node
6739
        src_images = self.src_images
6740
        cluster_name = self.cfg.GetClusterName()
6741
        # FIXME: pass debug option from opcode to backend
6742
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6743
                                                         src_node, src_images,
6744
                                                         cluster_name,
6745
                                                         self.op.debug_level)
6746
        msg = import_result.fail_msg
6747
        if msg:
6748
          self.LogWarning("Error while importing the disk images for instance"
6749
                          " %s on node %s: %s" % (instance, pnode_name, msg))
6750
      else:
6751
        # also checked in the prereq part
6752
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6753
                                     % self.op.mode)
6754

    
6755
    if self.op.start:
6756
      iobj.admin_up = True
6757
      self.cfg.Update(iobj, feedback_fn)
6758
      logging.info("Starting instance %s on node %s", instance, pnode_name)
6759
      feedback_fn("* starting instance...")
6760
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6761
      result.Raise("Could not start instance")
6762

    
6763
    return list(iobj.all_nodes)
6764

    
6765

    
6766
class LUConnectConsole(NoHooksLU):
6767
  """Connect to an instance's console.
6768

6769
  This is somewhat special in that it returns the command line that
6770
  you need to run on the master node in order to connect to the
6771
  console.
6772

6773
  """
6774
  _OP_REQP = ["instance_name"]
6775
  REQ_BGL = False
6776

    
6777
  def ExpandNames(self):
6778
    self._ExpandAndLockInstance()
6779

    
6780
  def CheckPrereq(self):
6781
    """Check prerequisites.
6782

6783
    This checks that the instance is in the cluster.
6784

6785
    """
6786
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6787
    assert self.instance is not None, \
6788
      "Cannot retrieve locked instance %s" % self.op.instance_name
6789
    _CheckNodeOnline(self, self.instance.primary_node)
6790

    
6791
  def Exec(self, feedback_fn):
6792
    """Connect to the console of an instance
6793

6794
    """
6795
    instance = self.instance
6796
    node = instance.primary_node
6797

    
6798
    node_insts = self.rpc.call_instance_list([node],
6799
                                             [instance.hypervisor])[node]
6800
    node_insts.Raise("Can't get node information from %s" % node)
6801

    
6802
    if instance.name not in node_insts.payload:
6803
      raise errors.OpExecError("Instance %s is not running." % instance.name)
6804

    
6805
    logging.debug("Connecting to console of %s on %s", instance.name, node)
6806

    
6807
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
6808
    cluster = self.cfg.GetClusterInfo()
6809
    # beparams and hvparams are passed separately, to avoid editing the
6810
    # instance and then saving the defaults in the instance itself.
6811
    hvparams = cluster.FillHV(instance)
6812
    beparams = cluster.FillBE(instance)
6813
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6814

    
6815
    # build ssh cmdline
6816
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6817

    
6818

    
6819
class LUReplaceDisks(LogicalUnit):
6820
  """Replace the disks of an instance.
6821

6822
  """
6823
  HPATH = "mirrors-replace"
6824
  HTYPE = constants.HTYPE_INSTANCE
6825
  _OP_REQP = ["instance_name", "mode", "disks"]
6826
  REQ_BGL = False
6827

    
6828
  def CheckArguments(self):
6829
    if not hasattr(self.op, "remote_node"):
6830
      self.op.remote_node = None
6831
    if not hasattr(self.op, "iallocator"):
6832
      self.op.iallocator = None
6833
    if not hasattr(self.op, "early_release"):
6834
      self.op.early_release = False
6835

    
6836
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6837
                                  self.op.iallocator)
6838

    
6839
  def ExpandNames(self):
6840
    self._ExpandAndLockInstance()
6841

    
6842
    if self.op.iallocator is not None:
6843
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6844

    
6845
    elif self.op.remote_node is not None:
6846
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6847
      self.op.remote_node = remote_node
6848

    
6849
      # Warning: do not remove the locking of the new secondary here
6850
      # unless DRBD8.AddChildren is changed to work in parallel;
6851
      # currently it doesn't since parallel invocations of
6852
      # FindUnusedMinor will conflict
6853
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6854
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6855

    
6856
    else:
6857
      self.needed_locks[locking.LEVEL_NODE] = []
6858
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6859

    
6860
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6861
                                   self.op.iallocator, self.op.remote_node,
6862
                                   self.op.disks, False, self.op.early_release)
6863

    
6864
    self.tasklets = [self.replacer]
6865

    
6866
  def DeclareLocks(self, level):
6867
    # If we're not already locking all nodes in the set we have to declare the
6868
    # instance's primary/secondary nodes.
6869
    if (level == locking.LEVEL_NODE and
6870
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6871
      self._LockInstancesNodes()
6872

    
6873
  def BuildHooksEnv(self):
6874
    """Build hooks env.
6875

6876
    This runs on the master, the primary and all the secondaries.
6877

6878
    """
6879
    instance = self.replacer.instance
6880
    env = {
6881
      "MODE": self.op.mode,
6882
      "NEW_SECONDARY": self.op.remote_node,
6883
      "OLD_SECONDARY": instance.secondary_nodes[0],
6884
      }
6885
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6886
    nl = [
6887
      self.cfg.GetMasterNode(),
6888
      instance.primary_node,
6889
      ]
6890
    if self.op.remote_node is not None:
6891
      nl.append(self.op.remote_node)
6892
    return env, nl, nl
6893

    
6894

    
6895
class LUEvacuateNode(LogicalUnit):
6896
  """Relocate the secondary instances from a node.
6897

6898
  """
6899
  HPATH = "node-evacuate"
6900
  HTYPE = constants.HTYPE_NODE
6901
  _OP_REQP = ["node_name"]
6902
  REQ_BGL = False
6903

    
6904
  def CheckArguments(self):
6905
    if not hasattr(self.op, "remote_node"):
6906
      self.op.remote_node = None
6907
    if not hasattr(self.op, "iallocator"):
6908
      self.op.iallocator = None
6909
    if not hasattr(self.op, "early_release"):
6910
      self.op.early_release = False
6911

    
6912
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6913
                                  self.op.remote_node,
6914
                                  self.op.iallocator)
6915

    
6916
  def ExpandNames(self):
6917
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6918

    
6919
    self.needed_locks = {}
6920

    
6921
    # Declare node locks
6922
    if self.op.iallocator is not None:
6923
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6924

    
6925
    elif self.op.remote_node is not None:
6926
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6927

    
6928
      # Warning: do not remove the locking of the new secondary here
6929
      # unless DRBD8.AddChildren is changed to work in parallel;
6930
      # currently it doesn't since parallel invocations of
6931
      # FindUnusedMinor will conflict
6932
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
6933
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6934

    
6935
    else:
6936
      raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
6937

    
6938
    # Create tasklets for replacing disks for all secondary instances on this
6939
    # node
6940
    names = []
6941
    tasklets = []
6942

    
6943
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6944
      logging.debug("Replacing disks for instance %s", inst.name)
6945
      names.append(inst.name)
6946

    
6947
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6948
                                self.op.iallocator, self.op.remote_node, [],
6949
                                True, self.op.early_release)
6950
      tasklets.append(replacer)
6951

    
6952
    self.tasklets = tasklets
6953
    self.instance_names = names
6954

    
6955
    # Declare instance locks
6956
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6957

    
6958
  def DeclareLocks(self, level):
6959
    # If we're not already locking all nodes in the set we have to declare the
6960
    # instance's primary/secondary nodes.
6961
    if (level == locking.LEVEL_NODE and
6962
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6963
      self._LockInstancesNodes()
6964

    
6965
  def BuildHooksEnv(self):
6966
    """Build hooks env.
6967

6968
    This runs on the master, the primary and all the secondaries.
6969

6970
    """
6971
    env = {
6972
      "NODE_NAME": self.op.node_name,
6973
      }
6974

    
6975
    nl = [self.cfg.GetMasterNode()]
6976

    
6977
    if self.op.remote_node is not None:
6978
      env["NEW_SECONDARY"] = self.op.remote_node
6979
      nl.append(self.op.remote_node)
6980

    
6981
    return (env, nl, nl)
6982

    
6983

    
6984
class TLReplaceDisks(Tasklet):
6985
  """Replaces disks for an instance.
6986

6987
  Note: Locking is not within the scope of this class.
6988

6989
  """
6990
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6991
               disks, delay_iallocator, early_release):
6992
    """Initializes this class.
6993

6994
    """
6995
    Tasklet.__init__(self, lu)
6996

    
6997
    # Parameters
6998
    self.instance_name = instance_name
6999
    self.mode = mode
7000
    self.iallocator_name = iallocator_name
7001
    self.remote_node = remote_node
7002
    self.disks = disks
7003
    self.delay_iallocator = delay_iallocator
7004
    self.early_release = early_release
7005

    
7006
    # Runtime data
7007
    self.instance = None
7008
    self.new_node = None
7009
    self.target_node = None
7010
    self.other_node = None
7011
    self.remote_node_info = None
7012
    self.node_secondary_ip = None
7013

    
7014
  @staticmethod
7015
  def CheckArguments(mode, remote_node, iallocator):
7016
    """Helper function for users of this class.
7017

7018
    """
7019
    # check for valid parameter combination
7020
    if mode == constants.REPLACE_DISK_CHG:
7021
      if remote_node is None and iallocator is None:
7022
        raise errors.OpPrereqError("When changing the secondary either an"
7023
                                   " iallocator script must be used or the"
7024
                                   " new node given", errors.ECODE_INVAL)
7025

    
7026
      if remote_node is not None and iallocator is not None:
7027
        raise errors.OpPrereqError("Give either the iallocator or the new"
7028
                                   " secondary, not both", errors.ECODE_INVAL)
7029

    
7030
    elif remote_node is not None or iallocator is not None:
7031
      # Not replacing the secondary
7032
      raise errors.OpPrereqError("The iallocator and new node options can"
7033
                                 " only be used when changing the"
7034
                                 " secondary node", errors.ECODE_INVAL)
7035

    
7036
  @staticmethod
7037
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7038
    """Compute a new secondary node using an IAllocator.
7039

7040
    """
7041
    ial = IAllocator(lu.cfg, lu.rpc,
7042
                     mode=constants.IALLOCATOR_MODE_RELOC,
7043
                     name=instance_name,
7044
                     relocate_from=relocate_from)
7045

    
7046
    ial.Run(iallocator_name)
7047

    
7048
    if not ial.success:
7049
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7050
                                 " %s" % (iallocator_name, ial.info),
7051
                                 errors.ECODE_NORES)
7052

    
7053
    if len(ial.result) != ial.required_nodes:
7054
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7055
                                 " of nodes (%s), required %s" %
7056
                                 (iallocator_name,
7057
                                  len(ial.result), ial.required_nodes),
7058
                                 errors.ECODE_FAULT)
7059

    
7060
    remote_node_name = ial.result[0]
7061

    
7062
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7063
               instance_name, remote_node_name)
7064

    
7065
    return remote_node_name
7066

    
7067
  def _FindFaultyDisks(self, node_name):
7068
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7069
                                    node_name, True)
7070

    
7071
  def CheckPrereq(self):
7072
    """Check prerequisites.
7073

7074
    This checks that the instance is in the cluster.
7075

7076
    """
7077
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7078
    assert instance is not None, \
7079
      "Cannot retrieve locked instance %s" % self.instance_name
7080

    
7081
    if instance.disk_template != constants.DT_DRBD8:
7082
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7083
                                 " instances", errors.ECODE_INVAL)
7084

    
7085
    if len(instance.secondary_nodes) != 1:
7086
      raise errors.OpPrereqError("The instance has a strange layout,"
7087
                                 " expected one secondary but found %d" %
7088
                                 len(instance.secondary_nodes),
7089
                                 errors.ECODE_FAULT)
7090

    
7091
    if not self.delay_iallocator:
7092
      self._CheckPrereq2()
7093

    
7094
  def _CheckPrereq2(self):
7095
    """Check prerequisites, second part.
7096

7097
    This function should always be part of CheckPrereq. It was separated and is
7098
    now called from Exec because during node evacuation iallocator was only
7099
    called with an unmodified cluster model, not taking planned changes into
7100
    account.
7101

7102
    """
7103
    instance = self.instance
7104
    secondary_node = instance.secondary_nodes[0]
7105

    
7106
    if self.iallocator_name is None:
7107
      remote_node = self.remote_node
7108
    else:
7109
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7110
                                       instance.name, instance.secondary_nodes)
7111

    
7112
    if remote_node is not None:
7113
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7114
      assert self.remote_node_info is not None, \
7115
        "Cannot retrieve locked node %s" % remote_node
7116
    else:
7117
      self.remote_node_info = None
7118

    
7119
    if remote_node == self.instance.primary_node:
7120
      raise errors.OpPrereqError("The specified node is the primary node of"
7121
                                 " the instance.", errors.ECODE_INVAL)
7122

    
7123
    if remote_node == secondary_node:
7124
      raise errors.OpPrereqError("The specified node is already the"
7125
                                 " secondary node of the instance.",
7126
                                 errors.ECODE_INVAL)
7127

    
7128
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7129
                                    constants.REPLACE_DISK_CHG):
7130
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7131
                                 errors.ECODE_INVAL)
7132

    
7133
    if self.mode == constants.REPLACE_DISK_AUTO:
7134
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7135
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7136

    
7137
      if faulty_primary and faulty_secondary:
7138
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7139
                                   " one node and can not be repaired"
7140
                                   " automatically" % self.instance_name,
7141
                                   errors.ECODE_STATE)
7142

    
7143
      if faulty_primary:
7144
        self.disks = faulty_primary
7145
        self.target_node = instance.primary_node
7146
        self.other_node = secondary_node
7147
        check_nodes = [self.target_node, self.other_node]
7148
      elif faulty_secondary:
7149
        self.disks = faulty_secondary
7150
        self.target_node = secondary_node
7151
        self.other_node = instance.primary_node
7152
        check_nodes = [self.target_node, self.other_node]
7153
      else:
7154
        self.disks = []
7155
        check_nodes = []
7156

    
7157
    else:
7158
      # Non-automatic modes
7159
      if self.mode == constants.REPLACE_DISK_PRI:
7160
        self.target_node = instance.primary_node
7161
        self.other_node = secondary_node
7162
        check_nodes = [self.target_node, self.other_node]
7163

    
7164
      elif self.mode == constants.REPLACE_DISK_SEC:
7165
        self.target_node = secondary_node
7166
        self.other_node = instance.primary_node
7167
        check_nodes = [self.target_node, self.other_node]
7168

    
7169
      elif self.mode == constants.REPLACE_DISK_CHG:
7170
        self.new_node = remote_node
7171
        self.other_node = instance.primary_node
7172
        self.target_node = secondary_node
7173
        check_nodes = [self.new_node, self.other_node]
7174

    
7175
        _CheckNodeNotDrained(self.lu, remote_node)
7176

    
7177
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
7178
        assert old_node_info is not None
7179
        if old_node_info.offline and not self.early_release:
7180
          # doesn't make sense to delay the release
7181
          self.early_release = True
7182
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7183
                          " early-release mode", secondary_node)
7184

    
7185
      else:
7186
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7187
                                     self.mode)
7188

    
7189
      # If not specified all disks should be replaced
7190
      if not self.disks:
7191
        self.disks = range(len(self.instance.disks))
7192

    
7193
    for node in check_nodes:
7194
      _CheckNodeOnline(self.lu, node)
7195

    
7196
    # Check whether disks are valid
7197
    for disk_idx in self.disks:
7198
      instance.FindDisk(disk_idx)
7199

    
7200
    # Get secondary node IP addresses
7201
    node_2nd_ip = {}
7202

    
7203
    for node_name in [self.target_node, self.other_node, self.new_node]:
7204
      if node_name is not None:
7205
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7206

    
7207
    self.node_secondary_ip = node_2nd_ip
7208

    
7209
  def Exec(self, feedback_fn):
7210
    """Execute disk replacement.
7211

7212
    This dispatches the disk replacement to the appropriate handler.
7213

7214
    """
7215
    if self.delay_iallocator:
7216
      self._CheckPrereq2()
7217

    
7218
    if not self.disks:
7219
      feedback_fn("No disks need replacement")
7220
      return
7221

    
7222
    feedback_fn("Replacing disk(s) %s for %s" %
7223
                (utils.CommaJoin(self.disks), self.instance.name))
7224

    
7225
    activate_disks = (not self.instance.admin_up)
7226

    
7227
    # Activate the instance disks if we're replacing them on a down instance
7228
    if activate_disks:
7229
      _StartInstanceDisks(self.lu, self.instance, True)
7230

    
7231
    try:
7232
      # Should we replace the secondary node?
7233
      if self.new_node is not None:
7234
        fn = self._ExecDrbd8Secondary
7235
      else:
7236
        fn = self._ExecDrbd8DiskOnly
7237

    
7238
      return fn(feedback_fn)
7239

    
7240
    finally:
7241
      # Deactivate the instance disks if we're replacing them on a
7242
      # down instance
7243
      if activate_disks:
7244
        _SafeShutdownInstanceDisks(self.lu, self.instance)
7245

    
7246
  def _CheckVolumeGroup(self, nodes):
7247
    self.lu.LogInfo("Checking volume groups")
7248

    
7249
    vgname = self.cfg.GetVGName()
7250

    
7251
    # Make sure volume group exists on all involved nodes
7252
    results = self.rpc.call_vg_list(nodes)
7253
    if not results:
7254
      raise errors.OpExecError("Can't list volume groups on the nodes")
7255

    
7256
    for node in nodes:
7257
      res = results[node]
7258
      res.Raise("Error checking node %s" % node)
7259
      if vgname not in res.payload:
7260
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
7261
                                 (vgname, node))
7262

    
7263
  def _CheckDisksExistence(self, nodes):
7264
    # Check disk existence
7265
    for idx, dev in enumerate(self.instance.disks):
7266
      if idx not in self.disks:
7267
        continue
7268

    
7269
      for node in nodes:
7270
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7271
        self.cfg.SetDiskID(dev, node)
7272

    
7273
        result = self.rpc.call_blockdev_find(node, dev)
7274

    
7275
        msg = result.fail_msg
7276
        if msg or not result.payload:
7277
          if not msg:
7278
            msg = "disk not found"
7279
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7280
                                   (idx, node, msg))
7281

    
7282
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7283
    for idx, dev in enumerate(self.instance.disks):
7284
      if idx not in self.disks:
7285
        continue
7286

    
7287
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7288
                      (idx, node_name))
7289

    
7290
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7291
                                   ldisk=ldisk):
7292
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7293
                                 " replace disks for instance %s" %
7294
                                 (node_name, self.instance.name))
7295

    
7296
  def _CreateNewStorage(self, node_name):
7297
    vgname = self.cfg.GetVGName()
7298
    iv_names = {}
7299

    
7300
    for idx, dev in enumerate(self.instance.disks):
7301
      if idx not in self.disks:
7302
        continue
7303

    
7304
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7305

    
7306
      self.cfg.SetDiskID(dev, node_name)
7307

    
7308
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7309
      names = _GenerateUniqueNames(self.lu, lv_names)
7310

    
7311
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7312
                             logical_id=(vgname, names[0]))
7313
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7314
                             logical_id=(vgname, names[1]))
7315

    
7316
      new_lvs = [lv_data, lv_meta]
7317
      old_lvs = dev.children
7318
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7319

    
7320
      # we pass force_create=True to force the LVM creation
7321
      for new_lv in new_lvs:
7322
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7323
                        _GetInstanceInfoText(self.instance), False)
7324

    
7325
    return iv_names
7326

    
7327
  def _CheckDevices(self, node_name, iv_names):
7328
    for name, (dev, _, _) in iv_names.iteritems():
7329
      self.cfg.SetDiskID(dev, node_name)
7330

    
7331
      result = self.rpc.call_blockdev_find(node_name, dev)
7332

    
7333
      msg = result.fail_msg
7334
      if msg or not result.payload:
7335
        if not msg:
7336
          msg = "disk not found"
7337
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
7338
                                 (name, msg))
7339

    
7340
      if result.payload.is_degraded:
7341
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
7342

    
7343
  def _RemoveOldStorage(self, node_name, iv_names):
7344
    for name, (_, old_lvs, _) in iv_names.iteritems():
7345
      self.lu.LogInfo("Remove logical volumes for %s" % name)
7346

    
7347
      for lv in old_lvs:
7348
        self.cfg.SetDiskID(lv, node_name)
7349

    
7350
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7351
        if msg:
7352
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
7353
                             hint="remove unused LVs manually")
7354

    
7355
  def _ReleaseNodeLock(self, node_name):
7356
    """Releases the lock for a given node."""
7357
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7358

    
7359
  def _ExecDrbd8DiskOnly(self, feedback_fn):
7360
    """Replace a disk on the primary or secondary for DRBD 8.
7361

7362
    The algorithm for replace is quite complicated:
7363

7364
      1. for each disk to be replaced:
7365

7366
        1. create new LVs on the target node with unique names
7367
        1. detach old LVs from the drbd device
7368
        1. rename old LVs to name_replaced.<time_t>
7369
        1. rename new LVs to old LVs
7370
        1. attach the new LVs (with the old names now) to the drbd device
7371

7372
      1. wait for sync across all devices
7373

7374
      1. for each modified disk:
7375

7376
        1. remove old LVs (which have the name name_replaces.<time_t>)
7377

7378
    Failures are not very well handled.
7379

7380
    """
7381
    steps_total = 6
7382

    
7383
    # Step: check device activation
7384
    self.lu.LogStep(1, steps_total, "Check device existence")
7385
    self._CheckDisksExistence([self.other_node, self.target_node])
7386
    self._CheckVolumeGroup([self.target_node, self.other_node])
7387

    
7388
    # Step: check other node consistency
7389
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7390
    self._CheckDisksConsistency(self.other_node,
7391
                                self.other_node == self.instance.primary_node,
7392
                                False)
7393

    
7394
    # Step: create new storage
7395
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7396
    iv_names = self._CreateNewStorage(self.target_node)
7397

    
7398
    # Step: for each lv, detach+rename*2+attach
7399
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7400
    for dev, old_lvs, new_lvs in iv_names.itervalues():
7401
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7402

    
7403
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7404
                                                     old_lvs)
7405
      result.Raise("Can't detach drbd from local storage on node"
7406
                   " %s for device %s" % (self.target_node, dev.iv_name))
7407
      #dev.children = []
7408
      #cfg.Update(instance)
7409

    
7410
      # ok, we created the new LVs, so now we know we have the needed
7411
      # storage; as such, we proceed on the target node to rename
7412
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7413
      # using the assumption that logical_id == physical_id (which in
7414
      # turn is the unique_id on that node)
7415

    
7416
      # FIXME(iustin): use a better name for the replaced LVs
7417
      temp_suffix = int(time.time())
7418
      ren_fn = lambda d, suff: (d.physical_id[0],
7419
                                d.physical_id[1] + "_replaced-%s" % suff)
7420

    
7421
      # Build the rename list based on what LVs exist on the node
7422
      rename_old_to_new = []
7423
      for to_ren in old_lvs:
7424
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7425
        if not result.fail_msg and result.payload:
7426
          # device exists
7427
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7428

    
7429
      self.lu.LogInfo("Renaming the old LVs on the target node")
7430
      result = self.rpc.call_blockdev_rename(self.target_node,
7431
                                             rename_old_to_new)
7432
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
7433

    
7434
      # Now we rename the new LVs to the old LVs
7435
      self.lu.LogInfo("Renaming the new LVs on the target node")
7436
      rename_new_to_old = [(new, old.physical_id)
7437
                           for old, new in zip(old_lvs, new_lvs)]
7438
      result = self.rpc.call_blockdev_rename(self.target_node,
7439
                                             rename_new_to_old)
7440
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
7441

    
7442
      for old, new in zip(old_lvs, new_lvs):
7443
        new.logical_id = old.logical_id
7444
        self.cfg.SetDiskID(new, self.target_node)
7445

    
7446
      for disk in old_lvs:
7447
        disk.logical_id = ren_fn(disk, temp_suffix)
7448
        self.cfg.SetDiskID(disk, self.target_node)
7449

    
7450
      # Now that the new lvs have the old name, we can add them to the device
7451
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7452
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7453
                                                  new_lvs)
7454
      msg = result.fail_msg
7455
      if msg:
7456
        for new_lv in new_lvs:
7457
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
7458
                                               new_lv).fail_msg
7459
          if msg2:
7460
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7461
                               hint=("cleanup manually the unused logical"
7462
                                     "volumes"))
7463
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7464

    
7465
      dev.children = new_lvs
7466

    
7467
      self.cfg.Update(self.instance, feedback_fn)
7468

    
7469
    cstep = 5
7470
    if self.early_release:
7471
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7472
      cstep += 1
7473
      self._RemoveOldStorage(self.target_node, iv_names)
7474
      # WARNING: we release both node locks here, do not do other RPCs
7475
      # than WaitForSync to the primary node
7476
      self._ReleaseNodeLock([self.target_node, self.other_node])
7477

    
7478
    # Wait for sync
7479
    # This can fail as the old devices are degraded and _WaitForSync
7480
    # does a combined result over all disks, so we don't check its return value
7481
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7482
    cstep += 1
7483
    _WaitForSync(self.lu, self.instance)
7484

    
7485
    # Check all devices manually
7486
    self._CheckDevices(self.instance.primary_node, iv_names)
7487

    
7488
    # Step: remove old storage
7489
    if not self.early_release:
7490
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7491
      cstep += 1
7492
      self._RemoveOldStorage(self.target_node, iv_names)
7493

    
7494
  def _ExecDrbd8Secondary(self, feedback_fn):
7495
    """Replace the secondary node for DRBD 8.
7496

7497
    The algorithm for replace is quite complicated:
7498
      - for all disks of the instance:
7499
        - create new LVs on the new node with same names
7500
        - shutdown the drbd device on the old secondary
7501
        - disconnect the drbd network on the primary
7502
        - create the drbd device on the new secondary
7503
        - network attach the drbd on the primary, using an artifice:
7504
          the drbd code for Attach() will connect to the network if it
7505
          finds a device which is connected to the good local disks but
7506
          not network enabled
7507
      - wait for sync across all devices
7508
      - remove all disks from the old secondary
7509

7510
    Failures are not very well handled.
7511

7512
    """
7513
    steps_total = 6
7514

    
7515
    # Step: check device activation
7516
    self.lu.LogStep(1, steps_total, "Check device existence")
7517
    self._CheckDisksExistence([self.instance.primary_node])
7518
    self._CheckVolumeGroup([self.instance.primary_node])
7519

    
7520
    # Step: check other node consistency
7521
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7522
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
7523

    
7524
    # Step: create new storage
7525
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7526
    for idx, dev in enumerate(self.instance.disks):
7527
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7528
                      (self.new_node, idx))
7529
      # we pass force_create=True to force LVM creation
7530
      for new_lv in dev.children:
7531
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7532
                        _GetInstanceInfoText(self.instance), False)
7533

    
7534
    # Step 4: dbrd minors and drbd setups changes
7535
    # after this, we must manually remove the drbd minors on both the
7536
    # error and the success paths
7537
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7538
    minors = self.cfg.AllocateDRBDMinor([self.new_node
7539
                                         for dev in self.instance.disks],
7540
                                        self.instance.name)
7541
    logging.debug("Allocated minors %r", minors)
7542

    
7543
    iv_names = {}
7544
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7545
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7546
                      (self.new_node, idx))
7547
      # create new devices on new_node; note that we create two IDs:
7548
      # one without port, so the drbd will be activated without
7549
      # networking information on the new node at this stage, and one
7550
      # with network, for the latter activation in step 4
7551
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7552
      if self.instance.primary_node == o_node1:
7553
        p_minor = o_minor1
7554
      else:
7555
        assert self.instance.primary_node == o_node2, "Three-node instance?"
7556
        p_minor = o_minor2
7557

    
7558
      new_alone_id = (self.instance.primary_node, self.new_node, None,
7559
                      p_minor, new_minor, o_secret)
7560
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
7561
                    p_minor, new_minor, o_secret)
7562

    
7563
      iv_names[idx] = (dev, dev.children, new_net_id)
7564
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7565
                    new_net_id)
7566
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7567
                              logical_id=new_alone_id,
7568
                              children=dev.children,
7569
                              size=dev.size)
7570
      try:
7571
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7572
                              _GetInstanceInfoText(self.instance), False)
7573
      except errors.GenericError:
7574
        self.cfg.ReleaseDRBDMinors(self.instance.name)
7575
        raise
7576

    
7577
    # We have new devices, shutdown the drbd on the old secondary
7578
    for idx, dev in enumerate(self.instance.disks):
7579
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7580
      self.cfg.SetDiskID(dev, self.target_node)
7581
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7582
      if msg:
7583
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7584
                           "node: %s" % (idx, msg),
7585
                           hint=("Please cleanup this device manually as"
7586
                                 " soon as possible"))
7587

    
7588
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7589
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7590
                                               self.node_secondary_ip,
7591
                                               self.instance.disks)\
7592
                                              [self.instance.primary_node]
7593

    
7594
    msg = result.fail_msg
7595
    if msg:
7596
      # detaches didn't succeed (unlikely)
7597
      self.cfg.ReleaseDRBDMinors(self.instance.name)
7598
      raise errors.OpExecError("Can't detach the disks from the network on"
7599
                               " old node: %s" % (msg,))
7600

    
7601
    # if we managed to detach at least one, we update all the disks of
7602
    # the instance to point to the new secondary
7603
    self.lu.LogInfo("Updating instance configuration")
7604
    for dev, _, new_logical_id in iv_names.itervalues():
7605
      dev.logical_id = new_logical_id
7606
      self.cfg.SetDiskID(dev, self.instance.primary_node)
7607

    
7608
    self.cfg.Update(self.instance, feedback_fn)
7609

    
7610
    # and now perform the drbd attach
7611
    self.lu.LogInfo("Attaching primary drbds to new secondary"
7612
                    " (standalone => connected)")
7613
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7614
                                            self.new_node],
7615
                                           self.node_secondary_ip,
7616
                                           self.instance.disks,
7617
                                           self.instance.name,
7618
                                           False)
7619
    for to_node, to_result in result.items():
7620
      msg = to_result.fail_msg
7621
      if msg:
7622
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7623
                           to_node, msg,
7624
                           hint=("please do a gnt-instance info to see the"
7625
                                 " status of disks"))
7626
    cstep = 5
7627
    if self.early_release:
7628
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7629
      cstep += 1
7630
      self._RemoveOldStorage(self.target_node, iv_names)
7631
      # WARNING: we release all node locks here, do not do other RPCs
7632
      # than WaitForSync to the primary node
7633
      self._ReleaseNodeLock([self.instance.primary_node,
7634
                             self.target_node,
7635
                             self.new_node])
7636

    
7637
    # Wait for sync
7638
    # This can fail as the old devices are degraded and _WaitForSync
7639
    # does a combined result over all disks, so we don't check its return value
7640
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7641
    cstep += 1
7642
    _WaitForSync(self.lu, self.instance)
7643

    
7644
    # Check all devices manually
7645
    self._CheckDevices(self.instance.primary_node, iv_names)
7646

    
7647
    # Step: remove old storage
7648
    if not self.early_release:
7649
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7650
      self._RemoveOldStorage(self.target_node, iv_names)
7651

    
7652

    
7653
class LURepairNodeStorage(NoHooksLU):
7654
  """Repairs the volume group on a node.
7655

7656
  """
7657
  _OP_REQP = ["node_name"]
7658
  REQ_BGL = False
7659

    
7660
  def CheckArguments(self):
7661
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7662

    
7663
  def ExpandNames(self):
7664
    self.needed_locks = {
7665
      locking.LEVEL_NODE: [self.op.node_name],
7666
      }
7667

    
7668
  def _CheckFaultyDisks(self, instance, node_name):
7669
    """Ensure faulty disks abort the opcode or at least warn."""
7670
    try:
7671
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7672
                                  node_name, True):
7673
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7674
                                   " node '%s'" % (instance.name, node_name),
7675
                                   errors.ECODE_STATE)
7676
    except errors.OpPrereqError, err:
7677
      if self.op.ignore_consistency:
7678
        self.proc.LogWarning(str(err.args[0]))
7679
      else:
7680
        raise
7681

    
7682
  def CheckPrereq(self):
7683
    """Check prerequisites.
7684

7685
    """
7686
    storage_type = self.op.storage_type
7687

    
7688
    if (constants.SO_FIX_CONSISTENCY not in
7689
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7690
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
7691
                                 " repaired" % storage_type,
7692
                                 errors.ECODE_INVAL)
7693

    
7694
    # Check whether any instance on this node has faulty disks
7695
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7696
      if not inst.admin_up:
7697
        continue
7698
      check_nodes = set(inst.all_nodes)
7699
      check_nodes.discard(self.op.node_name)
7700
      for inst_node_name in check_nodes:
7701
        self._CheckFaultyDisks(inst, inst_node_name)
7702

    
7703
  def Exec(self, feedback_fn):
7704
    feedback_fn("Repairing storage unit '%s' on %s ..." %
7705
                (self.op.name, self.op.node_name))
7706

    
7707
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7708
    result = self.rpc.call_storage_execute(self.op.node_name,
7709
                                           self.op.storage_type, st_args,
7710
                                           self.op.name,
7711
                                           constants.SO_FIX_CONSISTENCY)
7712
    result.Raise("Failed to repair storage unit '%s' on %s" %
7713
                 (self.op.name, self.op.node_name))
7714

    
7715

    
7716
class LUNodeEvacuationStrategy(NoHooksLU):
7717
  """Computes the node evacuation strategy.
7718

7719
  """
7720
  _OP_REQP = ["nodes"]
7721
  REQ_BGL = False
7722

    
7723
  def CheckArguments(self):
7724
    if not hasattr(self.op, "remote_node"):
7725
      self.op.remote_node = None
7726
    if not hasattr(self.op, "iallocator"):
7727
      self.op.iallocator = None
7728
    if self.op.remote_node is not None and self.op.iallocator is not None:
7729
      raise errors.OpPrereqError("Give either the iallocator or the new"
7730
                                 " secondary, not both", errors.ECODE_INVAL)
7731

    
7732
  def ExpandNames(self):
7733
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7734
    self.needed_locks = locks = {}
7735
    if self.op.remote_node is None:
7736
      locks[locking.LEVEL_NODE] = locking.ALL_SET
7737
    else:
7738
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7739
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7740

    
7741
  def CheckPrereq(self):
7742
    pass
7743

    
7744
  def Exec(self, feedback_fn):
7745
    if self.op.remote_node is not None:
7746
      instances = []
7747
      for node in self.op.nodes:
7748
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7749
      result = []
7750
      for i in instances:
7751
        if i.primary_node == self.op.remote_node:
7752
          raise errors.OpPrereqError("Node %s is the primary node of"
7753
                                     " instance %s, cannot use it as"
7754
                                     " secondary" %
7755
                                     (self.op.remote_node, i.name),
7756
                                     errors.ECODE_INVAL)
7757
        result.append([i.name, self.op.remote_node])
7758
    else:
7759
      ial = IAllocator(self.cfg, self.rpc,
7760
                       mode=constants.IALLOCATOR_MODE_MEVAC,
7761
                       evac_nodes=self.op.nodes)
7762
      ial.Run(self.op.iallocator, validate=True)
7763
      if not ial.success:
7764
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7765
                                 errors.ECODE_NORES)
7766
      result = ial.result
7767
    return result
7768

    
7769

    
7770
class LUGrowDisk(LogicalUnit):
7771
  """Grow a disk of an instance.
7772

7773
  """
7774
  HPATH = "disk-grow"
7775
  HTYPE = constants.HTYPE_INSTANCE
7776
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7777
  REQ_BGL = False
7778

    
7779
  def ExpandNames(self):
7780
    self._ExpandAndLockInstance()
7781
    self.needed_locks[locking.LEVEL_NODE] = []
7782
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7783

    
7784
  def DeclareLocks(self, level):
7785
    if level == locking.LEVEL_NODE:
7786
      self._LockInstancesNodes()
7787

    
7788
  def BuildHooksEnv(self):
7789
    """Build hooks env.
7790

7791
    This runs on the master, the primary and all the secondaries.
7792

7793
    """
7794
    env = {
7795
      "DISK": self.op.disk,
7796
      "AMOUNT": self.op.amount,
7797
      }
7798
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7799
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7800
    return env, nl, nl
7801

    
7802
  def CheckPrereq(self):
7803
    """Check prerequisites.
7804

7805
    This checks that the instance is in the cluster.
7806

7807
    """
7808
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7809
    assert instance is not None, \
7810
      "Cannot retrieve locked instance %s" % self.op.instance_name
7811
    nodenames = list(instance.all_nodes)
7812
    for node in nodenames:
7813
      _CheckNodeOnline(self, node)
7814

    
7815

    
7816
    self.instance = instance
7817

    
7818
    if instance.disk_template not in constants.DTS_GROWABLE:
7819
      raise errors.OpPrereqError("Instance's disk layout does not support"
7820
                                 " growing.", errors.ECODE_INVAL)
7821

    
7822
    self.disk = instance.FindDisk(self.op.disk)
7823

    
7824
    if instance.disk_template != constants.DT_FILE:
7825
      # TODO: check the free disk space for file, when that feature will be
7826
      # supported
7827
      _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7828

    
7829
  def Exec(self, feedback_fn):
7830
    """Execute disk grow.
7831

7832
    """
7833
    instance = self.instance
7834
    disk = self.disk
7835
    for node in instance.all_nodes:
7836
      self.cfg.SetDiskID(disk, node)
7837
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7838
      result.Raise("Grow request failed to node %s" % node)
7839

    
7840
      # TODO: Rewrite code to work properly
7841
      # DRBD goes into sync mode for a short amount of time after executing the
7842
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7843
      # calling "resize" in sync mode fails. Sleeping for a short amount of
7844
      # time is a work-around.
7845
      time.sleep(5)
7846

    
7847
    disk.RecordGrow(self.op.amount)
7848
    self.cfg.Update(instance, feedback_fn)
7849
    if self.op.wait_for_sync:
7850
      disk_abort = not _WaitForSync(self, instance)
7851
      if disk_abort:
7852
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7853
                             " status.\nPlease check the instance.")
7854

    
7855

    
7856
class LUQueryInstanceData(NoHooksLU):
7857
  """Query runtime instance data.
7858

7859
  """
7860
  _OP_REQP = ["instances", "static"]
7861
  REQ_BGL = False
7862

    
7863
  def ExpandNames(self):
7864
    self.needed_locks = {}
7865
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7866

    
7867
    if not isinstance(self.op.instances, list):
7868
      raise errors.OpPrereqError("Invalid argument type 'instances'",
7869
                                 errors.ECODE_INVAL)
7870

    
7871
    if self.op.instances:
7872
      self.wanted_names = []
7873
      for name in self.op.instances:
7874
        full_name = _ExpandInstanceName(self.cfg, name)
7875
        self.wanted_names.append(full_name)
7876
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7877
    else:
7878
      self.wanted_names = None
7879
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7880

    
7881
    self.needed_locks[locking.LEVEL_NODE] = []
7882
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7883

    
7884
  def DeclareLocks(self, level):
7885
    if level == locking.LEVEL_NODE:
7886
      self._LockInstancesNodes()
7887

    
7888
  def CheckPrereq(self):
7889
    """Check prerequisites.
7890

7891
    This only checks the optional instance list against the existing names.
7892

7893
    """
7894
    if self.wanted_names is None:
7895
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7896

    
7897
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7898
                             in self.wanted_names]
7899
    return
7900

    
7901
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
7902
    """Returns the status of a block device
7903

7904
    """
7905
    if self.op.static or not node:
7906
      return None
7907

    
7908
    self.cfg.SetDiskID(dev, node)
7909

    
7910
    result = self.rpc.call_blockdev_find(node, dev)
7911
    if result.offline:
7912
      return None
7913

    
7914
    result.Raise("Can't compute disk status for %s" % instance_name)
7915

    
7916
    status = result.payload
7917
    if status is None:
7918
      return None
7919

    
7920
    return (status.dev_path, status.major, status.minor,
7921
            status.sync_percent, status.estimated_time,
7922
            status.is_degraded, status.ldisk_status)
7923

    
7924
  def _ComputeDiskStatus(self, instance, snode, dev):
7925
    """Compute block device status.
7926

7927
    """
7928
    if dev.dev_type in constants.LDS_DRBD:
7929
      # we change the snode then (otherwise we use the one passed in)
7930
      if dev.logical_id[0] == instance.primary_node:
7931
        snode = dev.logical_id[1]
7932
      else:
7933
        snode = dev.logical_id[0]
7934

    
7935
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
7936
                                              instance.name, dev)
7937
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
7938

    
7939
    if dev.children:
7940
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
7941
                      for child in dev.children]
7942
    else:
7943
      dev_children = []
7944

    
7945
    data = {
7946
      "iv_name": dev.iv_name,
7947
      "dev_type": dev.dev_type,
7948
      "logical_id": dev.logical_id,
7949
      "physical_id": dev.physical_id,
7950
      "pstatus": dev_pstatus,
7951
      "sstatus": dev_sstatus,
7952
      "children": dev_children,
7953
      "mode": dev.mode,
7954
      "size": dev.size,
7955
      }
7956

    
7957
    return data
7958

    
7959
  def Exec(self, feedback_fn):
7960
    """Gather and return data"""
7961
    result = {}
7962

    
7963
    cluster = self.cfg.GetClusterInfo()
7964

    
7965
    for instance in self.wanted_instances:
7966
      if not self.op.static:
7967
        remote_info = self.rpc.call_instance_info(instance.primary_node,
7968
                                                  instance.name,
7969
                                                  instance.hypervisor)
7970
        remote_info.Raise("Error checking node %s" % instance.primary_node)
7971
        remote_info = remote_info.payload
7972
        if remote_info and "state" in remote_info:
7973
          remote_state = "up"
7974
        else:
7975
          remote_state = "down"
7976
      else:
7977
        remote_state = None
7978
      if instance.admin_up:
7979
        config_state = "up"
7980
      else:
7981
        config_state = "down"
7982

    
7983
      disks = [self._ComputeDiskStatus(instance, None, device)
7984
               for device in instance.disks]
7985

    
7986
      idict = {
7987
        "name": instance.name,
7988
        "config_state": config_state,
7989
        "run_state": remote_state,
7990
        "pnode": instance.primary_node,
7991
        "snodes": instance.secondary_nodes,
7992
        "os": instance.os,
7993
        # this happens to be the same format used for hooks
7994
        "nics": _NICListToTuple(self, instance.nics),
7995
        "disks": disks,
7996
        "hypervisor": instance.hypervisor,
7997
        "network_port": instance.network_port,
7998
        "hv_instance": instance.hvparams,
7999
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8000
        "be_instance": instance.beparams,
8001
        "be_actual": cluster.FillBE(instance),
8002
        "serial_no": instance.serial_no,
8003
        "mtime": instance.mtime,
8004
        "ctime": instance.ctime,
8005
        "uuid": instance.uuid,
8006
        }
8007

    
8008
      result[instance.name] = idict
8009

    
8010
    return result
8011

    
8012

    
8013
class LUSetInstanceParams(LogicalUnit):
8014
  """Modifies an instances's parameters.
8015

8016
  """
8017
  HPATH = "instance-modify"
8018
  HTYPE = constants.HTYPE_INSTANCE
8019
  _OP_REQP = ["instance_name"]
8020
  REQ_BGL = False
8021

    
8022
  def CheckArguments(self):
8023
    if not hasattr(self.op, 'nics'):
8024
      self.op.nics = []
8025
    if not hasattr(self.op, 'disks'):
8026
      self.op.disks = []
8027
    if not hasattr(self.op, 'beparams'):
8028
      self.op.beparams = {}
8029
    if not hasattr(self.op, 'hvparams'):
8030
      self.op.hvparams = {}
8031
    if not hasattr(self.op, "disk_template"):
8032
      self.op.disk_template = None
8033
    if not hasattr(self.op, "remote_node"):
8034
      self.op.remote_node = None
8035
    if not hasattr(self.op, "os_name"):
8036
      self.op.os_name = None
8037
    if not hasattr(self.op, "force_variant"):
8038
      self.op.force_variant = False
8039
    self.op.force = getattr(self.op, "force", False)
8040
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8041
            self.op.hvparams or self.op.beparams or self.op.os_name):
8042
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8043

    
8044
    if self.op.hvparams:
8045
      _CheckGlobalHvParams(self.op.hvparams)
8046

    
8047
    # Disk validation
8048
    disk_addremove = 0
8049
    for disk_op, disk_dict in self.op.disks:
8050
      if disk_op == constants.DDM_REMOVE:
8051
        disk_addremove += 1
8052
        continue
8053
      elif disk_op == constants.DDM_ADD:
8054
        disk_addremove += 1
8055
      else:
8056
        if not isinstance(disk_op, int):
8057
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8058
        if not isinstance(disk_dict, dict):
8059
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8060
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8061

    
8062
      if disk_op == constants.DDM_ADD:
8063
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8064
        if mode not in constants.DISK_ACCESS_SET:
8065
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8066
                                     errors.ECODE_INVAL)
8067
        size = disk_dict.get('size', None)
8068
        if size is None:
8069
          raise errors.OpPrereqError("Required disk parameter size missing",
8070
                                     errors.ECODE_INVAL)
8071
        try:
8072
          size = int(size)
8073
        except (TypeError, ValueError), err:
8074
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8075
                                     str(err), errors.ECODE_INVAL)
8076
        disk_dict['size'] = size
8077
      else:
8078
        # modification of disk
8079
        if 'size' in disk_dict:
8080
          raise errors.OpPrereqError("Disk size change not possible, use"
8081
                                     " grow-disk", errors.ECODE_INVAL)
8082

    
8083
    if disk_addremove > 1:
8084
      raise errors.OpPrereqError("Only one disk add or remove operation"
8085
                                 " supported at a time", errors.ECODE_INVAL)
8086

    
8087
    if self.op.disks and self.op.disk_template is not None:
8088
      raise errors.OpPrereqError("Disk template conversion and other disk"
8089
                                 " changes not supported at the same time",
8090
                                 errors.ECODE_INVAL)
8091

    
8092
    if self.op.disk_template:
8093
      _CheckDiskTemplate(self.op.disk_template)
8094
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
8095
          self.op.remote_node is None):
8096
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
8097
                                   " one requires specifying a secondary node",
8098
                                   errors.ECODE_INVAL)
8099

    
8100
    # NIC validation
8101
    nic_addremove = 0
8102
    for nic_op, nic_dict in self.op.nics:
8103
      if nic_op == constants.DDM_REMOVE:
8104
        nic_addremove += 1
8105
        continue
8106
      elif nic_op == constants.DDM_ADD:
8107
        nic_addremove += 1
8108
      else:
8109
        if not isinstance(nic_op, int):
8110
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8111
        if not isinstance(nic_dict, dict):
8112
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8113
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8114

    
8115
      # nic_dict should be a dict
8116
      nic_ip = nic_dict.get('ip', None)
8117
      if nic_ip is not None:
8118
        if nic_ip.lower() == constants.VALUE_NONE:
8119
          nic_dict['ip'] = None
8120
        else:
8121
          if not utils.IsValidIP(nic_ip):
8122
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8123
                                       errors.ECODE_INVAL)
8124

    
8125
      nic_bridge = nic_dict.get('bridge', None)
8126
      nic_link = nic_dict.get('link', None)
8127
      if nic_bridge and nic_link:
8128
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8129
                                   " at the same time", errors.ECODE_INVAL)
8130
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8131
        nic_dict['bridge'] = None
8132
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8133
        nic_dict['link'] = None
8134

    
8135
      if nic_op == constants.DDM_ADD:
8136
        nic_mac = nic_dict.get('mac', None)
8137
        if nic_mac is None:
8138
          nic_dict['mac'] = constants.VALUE_AUTO
8139

    
8140
      if 'mac' in nic_dict:
8141
        nic_mac = nic_dict['mac']
8142
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8143
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8144

    
8145
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8146
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8147
                                     " modifying an existing nic",
8148
                                     errors.ECODE_INVAL)
8149

    
8150
    if nic_addremove > 1:
8151
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8152
                                 " supported at a time", errors.ECODE_INVAL)
8153

    
8154
  def ExpandNames(self):
8155
    self._ExpandAndLockInstance()
8156
    self.needed_locks[locking.LEVEL_NODE] = []
8157
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8158

    
8159
  def DeclareLocks(self, level):
8160
    if level == locking.LEVEL_NODE:
8161
      self._LockInstancesNodes()
8162
      if self.op.disk_template and self.op.remote_node:
8163
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8164
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8165

    
8166
  def BuildHooksEnv(self):
8167
    """Build hooks env.
8168

8169
    This runs on the master, primary and secondaries.
8170

8171
    """
8172
    args = dict()
8173
    if constants.BE_MEMORY in self.be_new:
8174
      args['memory'] = self.be_new[constants.BE_MEMORY]
8175
    if constants.BE_VCPUS in self.be_new:
8176
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
8177
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8178
    # information at all.
8179
    if self.op.nics:
8180
      args['nics'] = []
8181
      nic_override = dict(self.op.nics)
8182
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
8183
      for idx, nic in enumerate(self.instance.nics):
8184
        if idx in nic_override:
8185
          this_nic_override = nic_override[idx]
8186
        else:
8187
          this_nic_override = {}
8188
        if 'ip' in this_nic_override:
8189
          ip = this_nic_override['ip']
8190
        else:
8191
          ip = nic.ip
8192
        if 'mac' in this_nic_override:
8193
          mac = this_nic_override['mac']
8194
        else:
8195
          mac = nic.mac
8196
        if idx in self.nic_pnew:
8197
          nicparams = self.nic_pnew[idx]
8198
        else:
8199
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
8200
        mode = nicparams[constants.NIC_MODE]
8201
        link = nicparams[constants.NIC_LINK]
8202
        args['nics'].append((ip, mac, mode, link))
8203
      if constants.DDM_ADD in nic_override:
8204
        ip = nic_override[constants.DDM_ADD].get('ip', None)
8205
        mac = nic_override[constants.DDM_ADD]['mac']
8206
        nicparams = self.nic_pnew[constants.DDM_ADD]
8207
        mode = nicparams[constants.NIC_MODE]
8208
        link = nicparams[constants.NIC_LINK]
8209
        args['nics'].append((ip, mac, mode, link))
8210
      elif constants.DDM_REMOVE in nic_override:
8211
        del args['nics'][-1]
8212

    
8213
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8214
    if self.op.disk_template:
8215
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8216
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8217
    return env, nl, nl
8218

    
8219
  @staticmethod
8220
  def _GetUpdatedParams(old_params, update_dict,
8221
                        default_values, parameter_types):
8222
    """Return the new params dict for the given params.
8223

8224
    @type old_params: dict
8225
    @param old_params: old parameters
8226
    @type update_dict: dict
8227
    @param update_dict: dict containing new parameter values,
8228
                        or constants.VALUE_DEFAULT to reset the
8229
                        parameter to its default value
8230
    @type default_values: dict
8231
    @param default_values: default values for the filled parameters
8232
    @type parameter_types: dict
8233
    @param parameter_types: dict mapping target dict keys to types
8234
                            in constants.ENFORCEABLE_TYPES
8235
    @rtype: (dict, dict)
8236
    @return: (new_parameters, filled_parameters)
8237

8238
    """
8239
    params_copy = copy.deepcopy(old_params)
8240
    for key, val in update_dict.iteritems():
8241
      if val == constants.VALUE_DEFAULT:
8242
        try:
8243
          del params_copy[key]
8244
        except KeyError:
8245
          pass
8246
      else:
8247
        params_copy[key] = val
8248
    utils.ForceDictType(params_copy, parameter_types)
8249
    params_filled = objects.FillDict(default_values, params_copy)
8250
    return (params_copy, params_filled)
8251

    
8252
  def CheckPrereq(self):
8253
    """Check prerequisites.
8254

8255
    This only checks the instance list against the existing names.
8256

8257
    """
8258
    self.force = self.op.force
8259

    
8260
    # checking the new params on the primary/secondary nodes
8261

    
8262
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8263
    cluster = self.cluster = self.cfg.GetClusterInfo()
8264
    assert self.instance is not None, \
8265
      "Cannot retrieve locked instance %s" % self.op.instance_name
8266
    pnode = instance.primary_node
8267
    nodelist = list(instance.all_nodes)
8268

    
8269
    if self.op.disk_template:
8270
      if instance.disk_template == self.op.disk_template:
8271
        raise errors.OpPrereqError("Instance already has disk template %s" %
8272
                                   instance.disk_template, errors.ECODE_INVAL)
8273

    
8274
      if (instance.disk_template,
8275
          self.op.disk_template) not in self._DISK_CONVERSIONS:
8276
        raise errors.OpPrereqError("Unsupported disk template conversion from"
8277
                                   " %s to %s" % (instance.disk_template,
8278
                                                  self.op.disk_template),
8279
                                   errors.ECODE_INVAL)
8280
      if self.op.disk_template in constants.DTS_NET_MIRROR:
8281
        _CheckNodeOnline(self, self.op.remote_node)
8282
        _CheckNodeNotDrained(self, self.op.remote_node)
8283
        disks = [{"size": d.size} for d in instance.disks]
8284
        required = _ComputeDiskSize(self.op.disk_template, disks)
8285
        _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8286
        _CheckInstanceDown(self, instance, "cannot change disk template")
8287

    
8288
    # hvparams processing
8289
    if self.op.hvparams:
8290
      i_hvdict, hv_new = self._GetUpdatedParams(
8291
                             instance.hvparams, self.op.hvparams,
8292
                             cluster.hvparams[instance.hypervisor],
8293
                             constants.HVS_PARAMETER_TYPES)
8294
      # local check
8295
      hypervisor.GetHypervisor(
8296
        instance.hypervisor).CheckParameterSyntax(hv_new)
8297
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8298
      self.hv_new = hv_new # the new actual values
8299
      self.hv_inst = i_hvdict # the new dict (without defaults)
8300
    else:
8301
      self.hv_new = self.hv_inst = {}
8302

    
8303
    # beparams processing
8304
    if self.op.beparams:
8305
      i_bedict, be_new = self._GetUpdatedParams(
8306
                             instance.beparams, self.op.beparams,
8307
                             cluster.beparams[constants.PP_DEFAULT],
8308
                             constants.BES_PARAMETER_TYPES)
8309
      self.be_new = be_new # the new actual values
8310
      self.be_inst = i_bedict # the new dict (without defaults)
8311
    else:
8312
      self.be_new = self.be_inst = {}
8313

    
8314
    self.warn = []
8315

    
8316
    if constants.BE_MEMORY in self.op.beparams and not self.force:
8317
      mem_check_list = [pnode]
8318
      if be_new[constants.BE_AUTO_BALANCE]:
8319
        # either we changed auto_balance to yes or it was from before
8320
        mem_check_list.extend(instance.secondary_nodes)
8321
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
8322
                                                  instance.hypervisor)
8323
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8324
                                         instance.hypervisor)
8325
      pninfo = nodeinfo[pnode]
8326
      msg = pninfo.fail_msg
8327
      if msg:
8328
        # Assume the primary node is unreachable and go ahead
8329
        self.warn.append("Can't get info from primary node %s: %s" %
8330
                         (pnode,  msg))
8331
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
8332
        self.warn.append("Node data from primary node %s doesn't contain"
8333
                         " free memory information" % pnode)
8334
      elif instance_info.fail_msg:
8335
        self.warn.append("Can't get instance runtime information: %s" %
8336
                        instance_info.fail_msg)
8337
      else:
8338
        if instance_info.payload:
8339
          current_mem = int(instance_info.payload['memory'])
8340
        else:
8341
          # Assume instance not running
8342
          # (there is a slight race condition here, but it's not very probable,
8343
          # and we have no other way to check)
8344
          current_mem = 0
8345
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8346
                    pninfo.payload['memory_free'])
8347
        if miss_mem > 0:
8348
          raise errors.OpPrereqError("This change will prevent the instance"
8349
                                     " from starting, due to %d MB of memory"
8350
                                     " missing on its primary node" % miss_mem,
8351
                                     errors.ECODE_NORES)
8352

    
8353
      if be_new[constants.BE_AUTO_BALANCE]:
8354
        for node, nres in nodeinfo.items():
8355
          if node not in instance.secondary_nodes:
8356
            continue
8357
          msg = nres.fail_msg
8358
          if msg:
8359
            self.warn.append("Can't get info from secondary node %s: %s" %
8360
                             (node, msg))
8361
          elif not isinstance(nres.payload.get('memory_free', None), int):
8362
            self.warn.append("Secondary node %s didn't return free"
8363
                             " memory information" % node)
8364
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8365
            self.warn.append("Not enough memory to failover instance to"
8366
                             " secondary node %s" % node)
8367

    
8368
    # NIC processing
8369
    self.nic_pnew = {}
8370
    self.nic_pinst = {}
8371
    for nic_op, nic_dict in self.op.nics:
8372
      if nic_op == constants.DDM_REMOVE:
8373
        if not instance.nics:
8374
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8375
                                     errors.ECODE_INVAL)
8376
        continue
8377
      if nic_op != constants.DDM_ADD:
8378
        # an existing nic
8379
        if not instance.nics:
8380
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8381
                                     " no NICs" % nic_op,
8382
                                     errors.ECODE_INVAL)
8383
        if nic_op < 0 or nic_op >= len(instance.nics):
8384
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8385
                                     " are 0 to %d" %
8386
                                     (nic_op, len(instance.nics) - 1),
8387
                                     errors.ECODE_INVAL)
8388
        old_nic_params = instance.nics[nic_op].nicparams
8389
        old_nic_ip = instance.nics[nic_op].ip
8390
      else:
8391
        old_nic_params = {}
8392
        old_nic_ip = None
8393

    
8394
      update_params_dict = dict([(key, nic_dict[key])
8395
                                 for key in constants.NICS_PARAMETERS
8396
                                 if key in nic_dict])
8397

    
8398
      if 'bridge' in nic_dict:
8399
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8400

    
8401
      new_nic_params, new_filled_nic_params = \
8402
          self._GetUpdatedParams(old_nic_params, update_params_dict,
8403
                                 cluster.nicparams[constants.PP_DEFAULT],
8404
                                 constants.NICS_PARAMETER_TYPES)
8405
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8406
      self.nic_pinst[nic_op] = new_nic_params
8407
      self.nic_pnew[nic_op] = new_filled_nic_params
8408
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8409

    
8410
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
8411
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8412
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8413
        if msg:
8414
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8415
          if self.force:
8416
            self.warn.append(msg)
8417
          else:
8418
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8419
      if new_nic_mode == constants.NIC_MODE_ROUTED:
8420
        if 'ip' in nic_dict:
8421
          nic_ip = nic_dict['ip']
8422
        else:
8423
          nic_ip = old_nic_ip
8424
        if nic_ip is None:
8425
          raise errors.OpPrereqError('Cannot set the nic ip to None'
8426
                                     ' on a routed nic', errors.ECODE_INVAL)
8427
      if 'mac' in nic_dict:
8428
        nic_mac = nic_dict['mac']
8429
        if nic_mac is None:
8430
          raise errors.OpPrereqError('Cannot set the nic mac to None',
8431
                                     errors.ECODE_INVAL)
8432
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8433
          # otherwise generate the mac
8434
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8435
        else:
8436
          # or validate/reserve the current one
8437
          try:
8438
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8439
          except errors.ReservationError:
8440
            raise errors.OpPrereqError("MAC address %s already in use"
8441
                                       " in cluster" % nic_mac,
8442
                                       errors.ECODE_NOTUNIQUE)
8443

    
8444
    # DISK processing
8445
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8446
      raise errors.OpPrereqError("Disk operations not supported for"
8447
                                 " diskless instances",
8448
                                 errors.ECODE_INVAL)
8449
    for disk_op, _ in self.op.disks:
8450
      if disk_op == constants.DDM_REMOVE:
8451
        if len(instance.disks) == 1:
8452
          raise errors.OpPrereqError("Cannot remove the last disk of"
8453
                                     " an instance", errors.ECODE_INVAL)
8454
        _CheckInstanceDown(self, instance, "cannot remove disks")
8455

    
8456
      if (disk_op == constants.DDM_ADD and
8457
          len(instance.nics) >= constants.MAX_DISKS):
8458
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8459
                                   " add more" % constants.MAX_DISKS,
8460
                                   errors.ECODE_STATE)
8461
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8462
        # an existing disk
8463
        if disk_op < 0 or disk_op >= len(instance.disks):
8464
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
8465
                                     " are 0 to %d" %
8466
                                     (disk_op, len(instance.disks)),
8467
                                     errors.ECODE_INVAL)
8468

    
8469
    # OS change
8470
    if self.op.os_name and not self.op.force:
8471
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8472
                      self.op.force_variant)
8473

    
8474
    return
8475

    
8476
  def _ConvertPlainToDrbd(self, feedback_fn):
8477
    """Converts an instance from plain to drbd.
8478

8479
    """
8480
    feedback_fn("Converting template to drbd")
8481
    instance = self.instance
8482
    pnode = instance.primary_node
8483
    snode = self.op.remote_node
8484

    
8485
    # create a fake disk info for _GenerateDiskTemplate
8486
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8487
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8488
                                      instance.name, pnode, [snode],
8489
                                      disk_info, None, None, 0)
8490
    info = _GetInstanceInfoText(instance)
8491
    feedback_fn("Creating aditional volumes...")
8492
    # first, create the missing data and meta devices
8493
    for disk in new_disks:
8494
      # unfortunately this is... not too nice
8495
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8496
                            info, True)
8497
      for child in disk.children:
8498
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
8499
    # at this stage, all new LVs have been created, we can rename the
8500
    # old ones
8501
    feedback_fn("Renaming original volumes...")
8502
    rename_list = [(o, n.children[0].logical_id)
8503
                   for (o, n) in zip(instance.disks, new_disks)]
8504
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
8505
    result.Raise("Failed to rename original LVs")
8506

    
8507
    feedback_fn("Initializing DRBD devices...")
8508
    # all child devices are in place, we can now create the DRBD devices
8509
    for disk in new_disks:
8510
      for node in [pnode, snode]:
8511
        f_create = node == pnode
8512
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8513

    
8514
    # at this point, the instance has been modified
8515
    instance.disk_template = constants.DT_DRBD8
8516
    instance.disks = new_disks
8517
    self.cfg.Update(instance, feedback_fn)
8518

    
8519
    # disks are created, waiting for sync
8520
    disk_abort = not _WaitForSync(self, instance)
8521
    if disk_abort:
8522
      raise errors.OpExecError("There are some degraded disks for"
8523
                               " this instance, please cleanup manually")
8524

    
8525
  def _ConvertDrbdToPlain(self, feedback_fn):
8526
    """Converts an instance from drbd to plain.
8527

8528
    """
8529
    instance = self.instance
8530
    assert len(instance.secondary_nodes) == 1
8531
    pnode = instance.primary_node
8532
    snode = instance.secondary_nodes[0]
8533
    feedback_fn("Converting template to plain")
8534

    
8535
    old_disks = instance.disks
8536
    new_disks = [d.children[0] for d in old_disks]
8537

    
8538
    # copy over size and mode
8539
    for parent, child in zip(old_disks, new_disks):
8540
      child.size = parent.size
8541
      child.mode = parent.mode
8542

    
8543
    # update instance structure
8544
    instance.disks = new_disks
8545
    instance.disk_template = constants.DT_PLAIN
8546
    self.cfg.Update(instance, feedback_fn)
8547

    
8548
    feedback_fn("Removing volumes on the secondary node...")
8549
    for disk in old_disks:
8550
      self.cfg.SetDiskID(disk, snode)
8551
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8552
      if msg:
8553
        self.LogWarning("Could not remove block device %s on node %s,"
8554
                        " continuing anyway: %s", disk.iv_name, snode, msg)
8555

    
8556
    feedback_fn("Removing unneeded volumes on the primary node...")
8557
    for idx, disk in enumerate(old_disks):
8558
      meta = disk.children[1]
8559
      self.cfg.SetDiskID(meta, pnode)
8560
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8561
      if msg:
8562
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
8563
                        " continuing anyway: %s", idx, pnode, msg)
8564

    
8565

    
8566
  def Exec(self, feedback_fn):
8567
    """Modifies an instance.
8568

8569
    All parameters take effect only at the next restart of the instance.
8570

8571
    """
8572
    # Process here the warnings from CheckPrereq, as we don't have a
8573
    # feedback_fn there.
8574
    for warn in self.warn:
8575
      feedback_fn("WARNING: %s" % warn)
8576

    
8577
    result = []
8578
    instance = self.instance
8579
    # disk changes
8580
    for disk_op, disk_dict in self.op.disks:
8581
      if disk_op == constants.DDM_REMOVE:
8582
        # remove the last disk
8583
        device = instance.disks.pop()
8584
        device_idx = len(instance.disks)
8585
        for node, disk in device.ComputeNodeTree(instance.primary_node):
8586
          self.cfg.SetDiskID(disk, node)
8587
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8588
          if msg:
8589
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
8590
                            " continuing anyway", device_idx, node, msg)
8591
        result.append(("disk/%d" % device_idx, "remove"))
8592
      elif disk_op == constants.DDM_ADD:
8593
        # add a new disk
8594
        if instance.disk_template == constants.DT_FILE:
8595
          file_driver, file_path = instance.disks[0].logical_id
8596
          file_path = os.path.dirname(file_path)
8597
        else:
8598
          file_driver = file_path = None
8599
        disk_idx_base = len(instance.disks)
8600
        new_disk = _GenerateDiskTemplate(self,
8601
                                         instance.disk_template,
8602
                                         instance.name, instance.primary_node,
8603
                                         instance.secondary_nodes,
8604
                                         [disk_dict],
8605
                                         file_path,
8606
                                         file_driver,
8607
                                         disk_idx_base)[0]
8608
        instance.disks.append(new_disk)
8609
        info = _GetInstanceInfoText(instance)
8610

    
8611
        logging.info("Creating volume %s for instance %s",
8612
                     new_disk.iv_name, instance.name)
8613
        # Note: this needs to be kept in sync with _CreateDisks
8614
        #HARDCODE
8615
        for node in instance.all_nodes:
8616
          f_create = node == instance.primary_node
8617
          try:
8618
            _CreateBlockDev(self, node, instance, new_disk,
8619
                            f_create, info, f_create)
8620
          except errors.OpExecError, err:
8621
            self.LogWarning("Failed to create volume %s (%s) on"
8622
                            " node %s: %s",
8623
                            new_disk.iv_name, new_disk, node, err)
8624
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8625
                       (new_disk.size, new_disk.mode)))
8626
      else:
8627
        # change a given disk
8628
        instance.disks[disk_op].mode = disk_dict['mode']
8629
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8630

    
8631
    if self.op.disk_template:
8632
      r_shut = _ShutdownInstanceDisks(self, instance)
8633
      if not r_shut:
8634
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8635
                                 " proceed with disk template conversion")
8636
      mode = (instance.disk_template, self.op.disk_template)
8637
      try:
8638
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
8639
      except:
8640
        self.cfg.ReleaseDRBDMinors(instance.name)
8641
        raise
8642
      result.append(("disk_template", self.op.disk_template))
8643

    
8644
    # NIC changes
8645
    for nic_op, nic_dict in self.op.nics:
8646
      if nic_op == constants.DDM_REMOVE:
8647
        # remove the last nic
8648
        del instance.nics[-1]
8649
        result.append(("nic.%d" % len(instance.nics), "remove"))
8650
      elif nic_op == constants.DDM_ADD:
8651
        # mac and bridge should be set, by now
8652
        mac = nic_dict['mac']
8653
        ip = nic_dict.get('ip', None)
8654
        nicparams = self.nic_pinst[constants.DDM_ADD]
8655
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8656
        instance.nics.append(new_nic)
8657
        result.append(("nic.%d" % (len(instance.nics) - 1),
8658
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
8659
                       (new_nic.mac, new_nic.ip,
8660
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8661
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8662
                       )))
8663
      else:
8664
        for key in 'mac', 'ip':
8665
          if key in nic_dict:
8666
            setattr(instance.nics[nic_op], key, nic_dict[key])
8667
        if nic_op in self.nic_pinst:
8668
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8669
        for key, val in nic_dict.iteritems():
8670
          result.append(("nic.%s/%d" % (key, nic_op), val))
8671

    
8672
    # hvparams changes
8673
    if self.op.hvparams:
8674
      instance.hvparams = self.hv_inst
8675
      for key, val in self.op.hvparams.iteritems():
8676
        result.append(("hv/%s" % key, val))
8677

    
8678
    # beparams changes
8679
    if self.op.beparams:
8680
      instance.beparams = self.be_inst
8681
      for key, val in self.op.beparams.iteritems():
8682
        result.append(("be/%s" % key, val))
8683

    
8684
    # OS change
8685
    if self.op.os_name:
8686
      instance.os = self.op.os_name
8687

    
8688
    self.cfg.Update(instance, feedback_fn)
8689

    
8690
    return result
8691

    
8692
  _DISK_CONVERSIONS = {
8693
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8694
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8695
    }
8696

    
8697
class LUQueryExports(NoHooksLU):
8698
  """Query the exports list
8699

8700
  """
8701
  _OP_REQP = ['nodes']
8702
  REQ_BGL = False
8703

    
8704
  def ExpandNames(self):
8705
    self.needed_locks = {}
8706
    self.share_locks[locking.LEVEL_NODE] = 1
8707
    if not self.op.nodes:
8708
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8709
    else:
8710
      self.needed_locks[locking.LEVEL_NODE] = \
8711
        _GetWantedNodes(self, self.op.nodes)
8712

    
8713
  def CheckPrereq(self):
8714
    """Check prerequisites.
8715

8716
    """
8717
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8718

    
8719
  def Exec(self, feedback_fn):
8720
    """Compute the list of all the exported system images.
8721

8722
    @rtype: dict
8723
    @return: a dictionary with the structure node->(export-list)
8724
        where export-list is a list of the instances exported on
8725
        that node.
8726

8727
    """
8728
    rpcresult = self.rpc.call_export_list(self.nodes)
8729
    result = {}
8730
    for node in rpcresult:
8731
      if rpcresult[node].fail_msg:
8732
        result[node] = False
8733
      else:
8734
        result[node] = rpcresult[node].payload
8735

    
8736
    return result
8737

    
8738

    
8739
class LUExportInstance(LogicalUnit):
8740
  """Export an instance to an image in the cluster.
8741

8742
  """
8743
  HPATH = "instance-export"
8744
  HTYPE = constants.HTYPE_INSTANCE
8745
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
8746
  REQ_BGL = False
8747

    
8748
  def CheckArguments(self):
8749
    """Check the arguments.
8750

8751
    """
8752
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8753
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
8754

    
8755
  def ExpandNames(self):
8756
    self._ExpandAndLockInstance()
8757
    # FIXME: lock only instance primary and destination node
8758
    #
8759
    # Sad but true, for now we have do lock all nodes, as we don't know where
8760
    # the previous export might be, and and in this LU we search for it and
8761
    # remove it from its current node. In the future we could fix this by:
8762
    #  - making a tasklet to search (share-lock all), then create the new one,
8763
    #    then one to remove, after
8764
    #  - removing the removal operation altogether
8765
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8766

    
8767
  def DeclareLocks(self, level):
8768
    """Last minute lock declaration."""
8769
    # All nodes are locked anyway, so nothing to do here.
8770

    
8771
  def BuildHooksEnv(self):
8772
    """Build hooks env.
8773

8774
    This will run on the master, primary node and target node.
8775

8776
    """
8777
    env = {
8778
      "EXPORT_NODE": self.op.target_node,
8779
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8780
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8781
      }
8782
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8783
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8784
          self.op.target_node]
8785
    return env, nl, nl
8786

    
8787
  def CheckPrereq(self):
8788
    """Check prerequisites.
8789

8790
    This checks that the instance and node names are valid.
8791

8792
    """
8793
    instance_name = self.op.instance_name
8794
    self.instance = self.cfg.GetInstanceInfo(instance_name)
8795
    assert self.instance is not None, \
8796
          "Cannot retrieve locked instance %s" % self.op.instance_name
8797
    _CheckNodeOnline(self, self.instance.primary_node)
8798

    
8799
    self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8800
    self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8801
    assert self.dst_node is not None
8802

    
8803
    _CheckNodeOnline(self, self.dst_node.name)
8804
    _CheckNodeNotDrained(self, self.dst_node.name)
8805

    
8806
    # instance disk type verification
8807
    for disk in self.instance.disks:
8808
      if disk.dev_type == constants.LD_FILE:
8809
        raise errors.OpPrereqError("Export not supported for instances with"
8810
                                   " file-based disks", errors.ECODE_INVAL)
8811

    
8812
  def Exec(self, feedback_fn):
8813
    """Export an instance to an image in the cluster.
8814

8815
    """
8816
    instance = self.instance
8817
    dst_node = self.dst_node
8818
    src_node = instance.primary_node
8819

    
8820
    if self.op.shutdown:
8821
      # shutdown the instance, but not the disks
8822
      feedback_fn("Shutting down instance %s" % instance.name)
8823
      result = self.rpc.call_instance_shutdown(src_node, instance,
8824
                                               self.shutdown_timeout)
8825
      result.Raise("Could not shutdown instance %s on"
8826
                   " node %s" % (instance.name, src_node))
8827

    
8828
    vgname = self.cfg.GetVGName()
8829

    
8830
    snap_disks = []
8831

    
8832
    # set the disks ID correctly since call_instance_start needs the
8833
    # correct drbd minor to create the symlinks
8834
    for disk in instance.disks:
8835
      self.cfg.SetDiskID(disk, src_node)
8836

    
8837
    activate_disks = (not instance.admin_up)
8838

    
8839
    if activate_disks:
8840
      # Activate the instance disks if we'exporting a stopped instance
8841
      feedback_fn("Activating disks for %s" % instance.name)
8842
      _StartInstanceDisks(self, instance, None)
8843

    
8844
    try:
8845
      # per-disk results
8846
      dresults = []
8847
      try:
8848
        for idx, disk in enumerate(instance.disks):
8849
          feedback_fn("Creating a snapshot of disk/%s on node %s" %
8850
                      (idx, src_node))
8851

    
8852
          # result.payload will be a snapshot of an lvm leaf of the one we
8853
          # passed
8854
          result = self.rpc.call_blockdev_snapshot(src_node, disk)
8855
          msg = result.fail_msg
8856
          if msg:
8857
            self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8858
                            idx, src_node, msg)
8859
            snap_disks.append(False)
8860
          else:
8861
            disk_id = (vgname, result.payload)
8862
            new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8863
                                   logical_id=disk_id, physical_id=disk_id,
8864
                                   iv_name=disk.iv_name)
8865
            snap_disks.append(new_dev)
8866

    
8867
      finally:
8868
        if self.op.shutdown and instance.admin_up:
8869
          feedback_fn("Starting instance %s" % instance.name)
8870
          result = self.rpc.call_instance_start(src_node, instance, None, None)
8871
          msg = result.fail_msg
8872
          if msg:
8873
            _ShutdownInstanceDisks(self, instance)
8874
            raise errors.OpExecError("Could not start instance: %s" % msg)
8875

    
8876
      # TODO: check for size
8877

    
8878
      cluster_name = self.cfg.GetClusterName()
8879
      for idx, dev in enumerate(snap_disks):
8880
        feedback_fn("Exporting snapshot %s from %s to %s" %
8881
                    (idx, src_node, dst_node.name))
8882
        if dev:
8883
          # FIXME: pass debug from opcode to backend
8884
          result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
8885
                                                 instance, cluster_name,
8886
                                                 idx, self.op.debug_level)
8887
          msg = result.fail_msg
8888
          if msg:
8889
            self.LogWarning("Could not export disk/%s from node %s to"
8890
                            " node %s: %s", idx, src_node, dst_node.name, msg)
8891
            dresults.append(False)
8892
          else:
8893
            dresults.append(True)
8894
          msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
8895
          if msg:
8896
            self.LogWarning("Could not remove snapshot for disk/%d from node"
8897
                            " %s: %s", idx, src_node, msg)
8898
        else:
8899
          dresults.append(False)
8900

    
8901
      feedback_fn("Finalizing export on %s" % dst_node.name)
8902
      result = self.rpc.call_finalize_export(dst_node.name, instance,
8903
                                             snap_disks)
8904
      fin_resu = True
8905
      msg = result.fail_msg
8906
      if msg:
8907
        self.LogWarning("Could not finalize export for instance %s"
8908
                        " on node %s: %s", instance.name, dst_node.name, msg)
8909
        fin_resu = False
8910

    
8911
    finally:
8912
      if activate_disks:
8913
        feedback_fn("Deactivating disks for %s" % instance.name)
8914
        _ShutdownInstanceDisks(self, instance)
8915

    
8916
    nodelist = self.cfg.GetNodeList()
8917
    nodelist.remove(dst_node.name)
8918

    
8919
    # on one-node clusters nodelist will be empty after the removal
8920
    # if we proceed the backup would be removed because OpQueryExports
8921
    # substitutes an empty list with the full cluster node list.
8922
    iname = instance.name
8923
    if nodelist:
8924
      feedback_fn("Removing old exports for instance %s" % iname)
8925
      exportlist = self.rpc.call_export_list(nodelist)
8926
      for node in exportlist:
8927
        if exportlist[node].fail_msg:
8928
          continue
8929
        if iname in exportlist[node].payload:
8930
          msg = self.rpc.call_export_remove(node, iname).fail_msg
8931
          if msg:
8932
            self.LogWarning("Could not remove older export for instance %s"
8933
                            " on node %s: %s", iname, node, msg)
8934
    return fin_resu, dresults
8935

    
8936

    
8937
class LURemoveExport(NoHooksLU):
8938
  """Remove exports related to the named instance.
8939

8940
  """
8941
  _OP_REQP = ["instance_name"]
8942
  REQ_BGL = False
8943

    
8944
  def ExpandNames(self):
8945
    self.needed_locks = {}
8946
    # We need all nodes to be locked in order for RemoveExport to work, but we
8947
    # don't need to lock the instance itself, as nothing will happen to it (and
8948
    # we can remove exports also for a removed instance)
8949
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8950

    
8951
  def CheckPrereq(self):
8952
    """Check prerequisites.
8953
    """
8954
    pass
8955

    
8956
  def Exec(self, feedback_fn):
8957
    """Remove any export.
8958

8959
    """
8960
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
8961
    # If the instance was not found we'll try with the name that was passed in.
8962
    # This will only work if it was an FQDN, though.
8963
    fqdn_warn = False
8964
    if not instance_name:
8965
      fqdn_warn = True
8966
      instance_name = self.op.instance_name
8967

    
8968
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
8969
    exportlist = self.rpc.call_export_list(locked_nodes)
8970
    found = False
8971
    for node in exportlist:
8972
      msg = exportlist[node].fail_msg
8973
      if msg:
8974
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
8975
        continue
8976
      if instance_name in exportlist[node].payload:
8977
        found = True
8978
        result = self.rpc.call_export_remove(node, instance_name)
8979
        msg = result.fail_msg
8980
        if msg:
8981
          logging.error("Could not remove export for instance %s"
8982
                        " on node %s: %s", instance_name, node, msg)
8983

    
8984
    if fqdn_warn and not found:
8985
      feedback_fn("Export not found. If trying to remove an export belonging"
8986
                  " to a deleted instance please use its Fully Qualified"
8987
                  " Domain Name.")
8988

    
8989

    
8990
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
8991
  """Generic tags LU.
8992

8993
  This is an abstract class which is the parent of all the other tags LUs.
8994

8995
  """
8996

    
8997
  def ExpandNames(self):
8998
    self.needed_locks = {}
8999
    if self.op.kind == constants.TAG_NODE:
9000
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9001
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
9002
    elif self.op.kind == constants.TAG_INSTANCE:
9003
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9004
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9005

    
9006
  def CheckPrereq(self):
9007
    """Check prerequisites.
9008

9009
    """
9010
    if self.op.kind == constants.TAG_CLUSTER:
9011
      self.target = self.cfg.GetClusterInfo()
9012
    elif self.op.kind == constants.TAG_NODE:
9013
      self.target = self.cfg.GetNodeInfo(self.op.name)
9014
    elif self.op.kind == constants.TAG_INSTANCE:
9015
      self.target = self.cfg.GetInstanceInfo(self.op.name)
9016
    else:
9017
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9018
                                 str(self.op.kind), errors.ECODE_INVAL)
9019

    
9020

    
9021
class LUGetTags(TagsLU):
9022
  """Returns the tags of a given object.
9023

9024
  """
9025
  _OP_REQP = ["kind", "name"]
9026
  REQ_BGL = False
9027

    
9028
  def Exec(self, feedback_fn):
9029
    """Returns the tag list.
9030

9031
    """
9032
    return list(self.target.GetTags())
9033

    
9034

    
9035
class LUSearchTags(NoHooksLU):
9036
  """Searches the tags for a given pattern.
9037

9038
  """
9039
  _OP_REQP = ["pattern"]
9040
  REQ_BGL = False
9041

    
9042
  def ExpandNames(self):
9043
    self.needed_locks = {}
9044

    
9045
  def CheckPrereq(self):
9046
    """Check prerequisites.
9047

9048
    This checks the pattern passed for validity by compiling it.
9049

9050
    """
9051
    try:
9052
      self.re = re.compile(self.op.pattern)
9053
    except re.error, err:
9054
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9055
                                 (self.op.pattern, err), errors.ECODE_INVAL)
9056

    
9057
  def Exec(self, feedback_fn):
9058
    """Returns the tag list.
9059

9060
    """
9061
    cfg = self.cfg
9062
    tgts = [("/cluster", cfg.GetClusterInfo())]
9063
    ilist = cfg.GetAllInstancesInfo().values()
9064
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9065
    nlist = cfg.GetAllNodesInfo().values()
9066
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9067
    results = []
9068
    for path, target in tgts:
9069
      for tag in target.GetTags():
9070
        if self.re.search(tag):
9071
          results.append((path, tag))
9072
    return results
9073

    
9074

    
9075
class LUAddTags(TagsLU):
9076
  """Sets a tag on a given object.
9077

9078
  """
9079
  _OP_REQP = ["kind", "name", "tags"]
9080
  REQ_BGL = False
9081

    
9082
  def CheckPrereq(self):
9083
    """Check prerequisites.
9084

9085
    This checks the type and length of the tag name and value.
9086

9087
    """
9088
    TagsLU.CheckPrereq(self)
9089
    for tag in self.op.tags:
9090
      objects.TaggableObject.ValidateTag(tag)
9091

    
9092
  def Exec(self, feedback_fn):
9093
    """Sets the tag.
9094

9095
    """
9096
    try:
9097
      for tag in self.op.tags:
9098
        self.target.AddTag(tag)
9099
    except errors.TagError, err:
9100
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
9101
    self.cfg.Update(self.target, feedback_fn)
9102

    
9103

    
9104
class LUDelTags(TagsLU):
9105
  """Delete a list of tags from a given object.
9106

9107
  """
9108
  _OP_REQP = ["kind", "name", "tags"]
9109
  REQ_BGL = False
9110

    
9111
  def CheckPrereq(self):
9112
    """Check prerequisites.
9113

9114
    This checks that we have the given tag.
9115

9116
    """
9117
    TagsLU.CheckPrereq(self)
9118
    for tag in self.op.tags:
9119
      objects.TaggableObject.ValidateTag(tag)
9120
    del_tags = frozenset(self.op.tags)
9121
    cur_tags = self.target.GetTags()
9122
    if not del_tags <= cur_tags:
9123
      diff_tags = del_tags - cur_tags
9124
      diff_names = ["'%s'" % tag for tag in diff_tags]
9125
      diff_names.sort()
9126
      raise errors.OpPrereqError("Tag(s) %s not found" %
9127
                                 (",".join(diff_names)), errors.ECODE_NOENT)
9128

    
9129
  def Exec(self, feedback_fn):
9130
    """Remove the tag from the object.
9131

9132
    """
9133
    for tag in self.op.tags:
9134
      self.target.RemoveTag(tag)
9135
    self.cfg.Update(self.target, feedback_fn)
9136

    
9137

    
9138
class LUTestDelay(NoHooksLU):
9139
  """Sleep for a specified amount of time.
9140

9141
  This LU sleeps on the master and/or nodes for a specified amount of
9142
  time.
9143

9144
  """
9145
  _OP_REQP = ["duration", "on_master", "on_nodes"]
9146
  REQ_BGL = False
9147

    
9148
  def ExpandNames(self):
9149
    """Expand names and set required locks.
9150

9151
    This expands the node list, if any.
9152

9153
    """
9154
    self.needed_locks = {}
9155
    if self.op.on_nodes:
9156
      # _GetWantedNodes can be used here, but is not always appropriate to use
9157
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9158
      # more information.
9159
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9160
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9161

    
9162
  def CheckPrereq(self):
9163
    """Check prerequisites.
9164

9165
    """
9166

    
9167
  def Exec(self, feedback_fn):
9168
    """Do the actual sleep.
9169

9170
    """
9171
    if self.op.on_master:
9172
      if not utils.TestDelay(self.op.duration):
9173
        raise errors.OpExecError("Error during master delay test")
9174
    if self.op.on_nodes:
9175
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9176
      for node, node_result in result.items():
9177
        node_result.Raise("Failure during rpc call to node %s" % node)
9178

    
9179

    
9180
class IAllocator(object):
9181
  """IAllocator framework.
9182

9183
  An IAllocator instance has three sets of attributes:
9184
    - cfg that is needed to query the cluster
9185
    - input data (all members of the _KEYS class attribute are required)
9186
    - four buffer attributes (in|out_data|text), that represent the
9187
      input (to the external script) in text and data structure format,
9188
      and the output from it, again in two formats
9189
    - the result variables from the script (success, info, nodes) for
9190
      easy usage
9191

9192
  """
9193
  # pylint: disable-msg=R0902
9194
  # lots of instance attributes
9195
  _ALLO_KEYS = [
9196
    "name", "mem_size", "disks", "disk_template",
9197
    "os", "tags", "nics", "vcpus", "hypervisor",
9198
    ]
9199
  _RELO_KEYS = [
9200
    "name", "relocate_from",
9201
    ]
9202
  _EVAC_KEYS = [
9203
    "evac_nodes",
9204
    ]
9205

    
9206
  def __init__(self, cfg, rpc, mode, **kwargs):
9207
    self.cfg = cfg
9208
    self.rpc = rpc
9209
    # init buffer variables
9210
    self.in_text = self.out_text = self.in_data = self.out_data = None
9211
    # init all input fields so that pylint is happy
9212
    self.mode = mode
9213
    self.mem_size = self.disks = self.disk_template = None
9214
    self.os = self.tags = self.nics = self.vcpus = None
9215
    self.hypervisor = None
9216
    self.relocate_from = None
9217
    self.name = None
9218
    self.evac_nodes = None
9219
    # computed fields
9220
    self.required_nodes = None
9221
    # init result fields
9222
    self.success = self.info = self.result = None
9223
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9224
      keyset = self._ALLO_KEYS
9225
      fn = self._AddNewInstance
9226
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9227
      keyset = self._RELO_KEYS
9228
      fn = self._AddRelocateInstance
9229
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9230
      keyset = self._EVAC_KEYS
9231
      fn = self._AddEvacuateNodes
9232
    else:
9233
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9234
                                   " IAllocator" % self.mode)
9235
    for key in kwargs:
9236
      if key not in keyset:
9237
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
9238
                                     " IAllocator" % key)
9239
      setattr(self, key, kwargs[key])
9240

    
9241
    for key in keyset:
9242
      if key not in kwargs:
9243
        raise errors.ProgrammerError("Missing input parameter '%s' to"
9244
                                     " IAllocator" % key)
9245
    self._BuildInputData(fn)
9246

    
9247
  def _ComputeClusterData(self):
9248
    """Compute the generic allocator input data.
9249

9250
    This is the data that is independent of the actual operation.
9251

9252
    """
9253
    cfg = self.cfg
9254
    cluster_info = cfg.GetClusterInfo()
9255
    # cluster data
9256
    data = {
9257
      "version": constants.IALLOCATOR_VERSION,
9258
      "cluster_name": cfg.GetClusterName(),
9259
      "cluster_tags": list(cluster_info.GetTags()),
9260
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9261
      # we don't have job IDs
9262
      }
9263
    iinfo = cfg.GetAllInstancesInfo().values()
9264
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9265

    
9266
    # node data
9267
    node_results = {}
9268
    node_list = cfg.GetNodeList()
9269

    
9270
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9271
      hypervisor_name = self.hypervisor
9272
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9273
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9274
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9275
      hypervisor_name = cluster_info.enabled_hypervisors[0]
9276

    
9277
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9278
                                        hypervisor_name)
9279
    node_iinfo = \
9280
      self.rpc.call_all_instances_info(node_list,
9281
                                       cluster_info.enabled_hypervisors)
9282
    for nname, nresult in node_data.items():
9283
      # first fill in static (config-based) values
9284
      ninfo = cfg.GetNodeInfo(nname)
9285
      pnr = {
9286
        "tags": list(ninfo.GetTags()),
9287
        "primary_ip": ninfo.primary_ip,
9288
        "secondary_ip": ninfo.secondary_ip,
9289
        "offline": ninfo.offline,
9290
        "drained": ninfo.drained,
9291
        "master_candidate": ninfo.master_candidate,
9292
        }
9293

    
9294
      if not (ninfo.offline or ninfo.drained):
9295
        nresult.Raise("Can't get data for node %s" % nname)
9296
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9297
                                nname)
9298
        remote_info = nresult.payload
9299

    
9300
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
9301
                     'vg_size', 'vg_free', 'cpu_total']:
9302
          if attr not in remote_info:
9303
            raise errors.OpExecError("Node '%s' didn't return attribute"
9304
                                     " '%s'" % (nname, attr))
9305
          if not isinstance(remote_info[attr], int):
9306
            raise errors.OpExecError("Node '%s' returned invalid value"
9307
                                     " for '%s': %s" %
9308
                                     (nname, attr, remote_info[attr]))
9309
        # compute memory used by primary instances
9310
        i_p_mem = i_p_up_mem = 0
9311
        for iinfo, beinfo in i_list:
9312
          if iinfo.primary_node == nname:
9313
            i_p_mem += beinfo[constants.BE_MEMORY]
9314
            if iinfo.name not in node_iinfo[nname].payload:
9315
              i_used_mem = 0
9316
            else:
9317
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9318
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9319
            remote_info['memory_free'] -= max(0, i_mem_diff)
9320

    
9321
            if iinfo.admin_up:
9322
              i_p_up_mem += beinfo[constants.BE_MEMORY]
9323

    
9324
        # compute memory used by instances
9325
        pnr_dyn = {
9326
          "total_memory": remote_info['memory_total'],
9327
          "reserved_memory": remote_info['memory_dom0'],
9328
          "free_memory": remote_info['memory_free'],
9329
          "total_disk": remote_info['vg_size'],
9330
          "free_disk": remote_info['vg_free'],
9331
          "total_cpus": remote_info['cpu_total'],
9332
          "i_pri_memory": i_p_mem,
9333
          "i_pri_up_memory": i_p_up_mem,
9334
          }
9335
        pnr.update(pnr_dyn)
9336

    
9337
      node_results[nname] = pnr
9338
    data["nodes"] = node_results
9339

    
9340
    # instance data
9341
    instance_data = {}
9342
    for iinfo, beinfo in i_list:
9343
      nic_data = []
9344
      for nic in iinfo.nics:
9345
        filled_params = objects.FillDict(
9346
            cluster_info.nicparams[constants.PP_DEFAULT],
9347
            nic.nicparams)
9348
        nic_dict = {"mac": nic.mac,
9349
                    "ip": nic.ip,
9350
                    "mode": filled_params[constants.NIC_MODE],
9351
                    "link": filled_params[constants.NIC_LINK],
9352
                   }
9353
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9354
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9355
        nic_data.append(nic_dict)
9356
      pir = {
9357
        "tags": list(iinfo.GetTags()),
9358
        "admin_up": iinfo.admin_up,
9359
        "vcpus": beinfo[constants.BE_VCPUS],
9360
        "memory": beinfo[constants.BE_MEMORY],
9361
        "os": iinfo.os,
9362
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9363
        "nics": nic_data,
9364
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9365
        "disk_template": iinfo.disk_template,
9366
        "hypervisor": iinfo.hypervisor,
9367
        }
9368
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9369
                                                 pir["disks"])
9370
      instance_data[iinfo.name] = pir
9371

    
9372
    data["instances"] = instance_data
9373

    
9374
    self.in_data = data
9375

    
9376
  def _AddNewInstance(self):
9377
    """Add new instance data to allocator structure.
9378

9379
    This in combination with _AllocatorGetClusterData will create the
9380
    correct structure needed as input for the allocator.
9381

9382
    The checks for the completeness of the opcode must have already been
9383
    done.
9384

9385
    """
9386
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9387

    
9388
    if self.disk_template in constants.DTS_NET_MIRROR:
9389
      self.required_nodes = 2
9390
    else:
9391
      self.required_nodes = 1
9392
    request = {
9393
      "name": self.name,
9394
      "disk_template": self.disk_template,
9395
      "tags": self.tags,
9396
      "os": self.os,
9397
      "vcpus": self.vcpus,
9398
      "memory": self.mem_size,
9399
      "disks": self.disks,
9400
      "disk_space_total": disk_space,
9401
      "nics": self.nics,
9402
      "required_nodes": self.required_nodes,
9403
      }
9404
    return request
9405

    
9406
  def _AddRelocateInstance(self):
9407
    """Add relocate instance data to allocator structure.
9408

9409
    This in combination with _IAllocatorGetClusterData will create the
9410
    correct structure needed as input for the allocator.
9411

9412
    The checks for the completeness of the opcode must have already been
9413
    done.
9414

9415
    """
9416
    instance = self.cfg.GetInstanceInfo(self.name)
9417
    if instance is None:
9418
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
9419
                                   " IAllocator" % self.name)
9420

    
9421
    if instance.disk_template not in constants.DTS_NET_MIRROR:
9422
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9423
                                 errors.ECODE_INVAL)
9424

    
9425
    if len(instance.secondary_nodes) != 1:
9426
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
9427
                                 errors.ECODE_STATE)
9428

    
9429
    self.required_nodes = 1
9430
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
9431
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9432

    
9433
    request = {
9434
      "name": self.name,
9435
      "disk_space_total": disk_space,
9436
      "required_nodes": self.required_nodes,
9437
      "relocate_from": self.relocate_from,
9438
      }
9439
    return request
9440

    
9441
  def _AddEvacuateNodes(self):
9442
    """Add evacuate nodes data to allocator structure.
9443

9444
    """
9445
    request = {
9446
      "evac_nodes": self.evac_nodes
9447
      }
9448
    return request
9449

    
9450
  def _BuildInputData(self, fn):
9451
    """Build input data structures.
9452

9453
    """
9454
    self._ComputeClusterData()
9455

    
9456
    request = fn()
9457
    request["type"] = self.mode
9458
    self.in_data["request"] = request
9459

    
9460
    self.in_text = serializer.Dump(self.in_data)
9461

    
9462
  def Run(self, name, validate=True, call_fn=None):
9463
    """Run an instance allocator and return the results.
9464

9465
    """
9466
    if call_fn is None:
9467
      call_fn = self.rpc.call_iallocator_runner
9468

    
9469
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9470
    result.Raise("Failure while running the iallocator script")
9471

    
9472
    self.out_text = result.payload
9473
    if validate:
9474
      self._ValidateResult()
9475

    
9476
  def _ValidateResult(self):
9477
    """Process the allocator results.
9478

9479
    This will process and if successful save the result in
9480
    self.out_data and the other parameters.
9481

9482
    """
9483
    try:
9484
      rdict = serializer.Load(self.out_text)
9485
    except Exception, err:
9486
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9487

    
9488
    if not isinstance(rdict, dict):
9489
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
9490

    
9491
    # TODO: remove backwards compatiblity in later versions
9492
    if "nodes" in rdict and "result" not in rdict:
9493
      rdict["result"] = rdict["nodes"]
9494
      del rdict["nodes"]
9495

    
9496
    for key in "success", "info", "result":
9497
      if key not in rdict:
9498
        raise errors.OpExecError("Can't parse iallocator results:"
9499
                                 " missing key '%s'" % key)
9500
      setattr(self, key, rdict[key])
9501

    
9502
    if not isinstance(rdict["result"], list):
9503
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9504
                               " is not a list")
9505
    self.out_data = rdict
9506

    
9507

    
9508
class LUTestAllocator(NoHooksLU):
9509
  """Run allocator tests.
9510

9511
  This LU runs the allocator tests
9512

9513
  """
9514
  _OP_REQP = ["direction", "mode", "name"]
9515

    
9516
  def CheckPrereq(self):
9517
    """Check prerequisites.
9518

9519
    This checks the opcode parameters depending on the director and mode test.
9520

9521
    """
9522
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9523
      for attr in ["name", "mem_size", "disks", "disk_template",
9524
                   "os", "tags", "nics", "vcpus"]:
9525
        if not hasattr(self.op, attr):
9526
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9527
                                     attr, errors.ECODE_INVAL)
9528
      iname = self.cfg.ExpandInstanceName(self.op.name)
9529
      if iname is not None:
9530
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9531
                                   iname, errors.ECODE_EXISTS)
9532
      if not isinstance(self.op.nics, list):
9533
        raise errors.OpPrereqError("Invalid parameter 'nics'",
9534
                                   errors.ECODE_INVAL)
9535
      for row in self.op.nics:
9536
        if (not isinstance(row, dict) or
9537
            "mac" not in row or
9538
            "ip" not in row or
9539
            "bridge" not in row):
9540
          raise errors.OpPrereqError("Invalid contents of the 'nics'"
9541
                                     " parameter", errors.ECODE_INVAL)
9542
      if not isinstance(self.op.disks, list):
9543
        raise errors.OpPrereqError("Invalid parameter 'disks'",
9544
                                   errors.ECODE_INVAL)
9545
      for row in self.op.disks:
9546
        if (not isinstance(row, dict) or
9547
            "size" not in row or
9548
            not isinstance(row["size"], int) or
9549
            "mode" not in row or
9550
            row["mode"] not in ['r', 'w']):
9551
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
9552
                                     " parameter", errors.ECODE_INVAL)
9553
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9554
        self.op.hypervisor = self.cfg.GetHypervisorType()
9555
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9556
      if not hasattr(self.op, "name"):
9557
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9558
                                   errors.ECODE_INVAL)
9559
      fname = _ExpandInstanceName(self.cfg, self.op.name)
9560
      self.op.name = fname
9561
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9562
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9563
      if not hasattr(self.op, "evac_nodes"):
9564
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9565
                                   " opcode input", errors.ECODE_INVAL)
9566
    else:
9567
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9568
                                 self.op.mode, errors.ECODE_INVAL)
9569

    
9570
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9571
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
9572
        raise errors.OpPrereqError("Missing allocator name",
9573
                                   errors.ECODE_INVAL)
9574
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9575
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
9576
                                 self.op.direction, errors.ECODE_INVAL)
9577

    
9578
  def Exec(self, feedback_fn):
9579
    """Run the allocator test.
9580

9581
    """
9582
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9583
      ial = IAllocator(self.cfg, self.rpc,
9584
                       mode=self.op.mode,
9585
                       name=self.op.name,
9586
                       mem_size=self.op.mem_size,
9587
                       disks=self.op.disks,
9588
                       disk_template=self.op.disk_template,
9589
                       os=self.op.os,
9590
                       tags=self.op.tags,
9591
                       nics=self.op.nics,
9592
                       vcpus=self.op.vcpus,
9593
                       hypervisor=self.op.hypervisor,
9594
                       )
9595
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9596
      ial = IAllocator(self.cfg, self.rpc,
9597
                       mode=self.op.mode,
9598
                       name=self.op.name,
9599
                       relocate_from=list(self.relocate_from),
9600
                       )
9601
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9602
      ial = IAllocator(self.cfg, self.rpc,
9603
                       mode=self.op.mode,
9604
                       evac_nodes=self.op.evac_nodes)
9605
    else:
9606
      raise errors.ProgrammerError("Uncatched mode %s in"
9607
                                   " LUTestAllocator.Exec", self.op.mode)
9608

    
9609
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
9610
      result = ial.in_text
9611
    else:
9612
      ial.Run(self.op.allocator, validate=False)
9613
      result = ial.out_text
9614
    return result