Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 1338f2b4

History | View | Annotate | Download (337.4 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
import os
30
import os.path
31
import time
32
import re
33
import platform
34
import logging
35
import copy
36
import OpenSSL
37

    
38
from ganeti import ssh
39
from ganeti import utils
40
from ganeti import errors
41
from ganeti import hypervisor
42
from ganeti import locking
43
from ganeti import constants
44
from ganeti import objects
45
from ganeti import serializer
46
from ganeti import ssconf
47
from ganeti import uidpool
48

    
49

    
50
class LogicalUnit(object):
51
  """Logical Unit base class.
52

53
  Subclasses must follow these rules:
54
    - implement ExpandNames
55
    - implement CheckPrereq (except when tasklets are used)
56
    - implement Exec (except when tasklets are used)
57
    - implement BuildHooksEnv
58
    - redefine HPATH and HTYPE
59
    - optionally redefine their run requirements:
60
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
61

62
  Note that all commands require root permissions.
63

64
  @ivar dry_run_result: the value (if any) that will be returned to the caller
65
      in dry-run mode (signalled by opcode dry_run parameter)
66

67
  """
68
  HPATH = None
69
  HTYPE = None
70
  _OP_REQP = []
71
  REQ_BGL = True
72

    
73
  def __init__(self, processor, op, context, rpc):
74
    """Constructor for LogicalUnit.
75

76
    This needs to be overridden in derived classes in order to check op
77
    validity.
78

79
    """
80
    self.proc = processor
81
    self.op = op
82
    self.cfg = context.cfg
83
    self.context = context
84
    self.rpc = rpc
85
    # Dicts used to declare locking needs to mcpu
86
    self.needed_locks = None
87
    self.acquired_locks = {}
88
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
89
    self.add_locks = {}
90
    self.remove_locks = {}
91
    # Used to force good behavior when calling helper functions
92
    self.recalculate_locks = {}
93
    self.__ssh = None
94
    # logging
95
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
96
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
97
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
98
    # support for dry-run
99
    self.dry_run_result = None
100
    # support for generic debug attribute
101
    if (not hasattr(self.op, "debug_level") or
102
        not isinstance(self.op.debug_level, int)):
103
      self.op.debug_level = 0
104

    
105
    # Tasklets
106
    self.tasklets = None
107

    
108
    for attr_name in self._OP_REQP:
109
      attr_val = getattr(op, attr_name, None)
110
      if attr_val is None:
111
        raise errors.OpPrereqError("Required parameter '%s' missing" %
112
                                   attr_name, errors.ECODE_INVAL)
113

    
114
    self.CheckArguments()
115

    
116
  def __GetSSH(self):
117
    """Returns the SshRunner object
118

119
    """
120
    if not self.__ssh:
121
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
122
    return self.__ssh
123

    
124
  ssh = property(fget=__GetSSH)
125

    
126
  def CheckArguments(self):
127
    """Check syntactic validity for the opcode arguments.
128

129
    This method is for doing a simple syntactic check and ensure
130
    validity of opcode parameters, without any cluster-related
131
    checks. While the same can be accomplished in ExpandNames and/or
132
    CheckPrereq, doing these separate is better because:
133

134
      - ExpandNames is left as as purely a lock-related function
135
      - CheckPrereq is run after we have acquired locks (and possible
136
        waited for them)
137

138
    The function is allowed to change the self.op attribute so that
139
    later methods can no longer worry about missing parameters.
140

141
    """
142
    pass
143

    
144
  def ExpandNames(self):
145
    """Expand names for this LU.
146

147
    This method is called before starting to execute the opcode, and it should
148
    update all the parameters of the opcode to their canonical form (e.g. a
149
    short node name must be fully expanded after this method has successfully
150
    completed). This way locking, hooks, logging, ecc. can work correctly.
151

152
    LUs which implement this method must also populate the self.needed_locks
153
    member, as a dict with lock levels as keys, and a list of needed lock names
154
    as values. Rules:
155

156
      - use an empty dict if you don't need any lock
157
      - if you don't need any lock at a particular level omit that level
158
      - don't put anything for the BGL level
159
      - if you want all locks at a level use locking.ALL_SET as a value
160

161
    If you need to share locks (rather than acquire them exclusively) at one
162
    level you can modify self.share_locks, setting a true value (usually 1) for
163
    that level. By default locks are not shared.
164

165
    This function can also define a list of tasklets, which then will be
166
    executed in order instead of the usual LU-level CheckPrereq and Exec
167
    functions, if those are not defined by the LU.
168

169
    Examples::
170

171
      # Acquire all nodes and one instance
172
      self.needed_locks = {
173
        locking.LEVEL_NODE: locking.ALL_SET,
174
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
175
      }
176
      # Acquire just two nodes
177
      self.needed_locks = {
178
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
179
      }
180
      # Acquire no locks
181
      self.needed_locks = {} # No, you can't leave it to the default value None
182

183
    """
184
    # The implementation of this method is mandatory only if the new LU is
185
    # concurrent, so that old LUs don't need to be changed all at the same
186
    # time.
187
    if self.REQ_BGL:
188
      self.needed_locks = {} # Exclusive LUs don't need locks.
189
    else:
190
      raise NotImplementedError
191

    
192
  def DeclareLocks(self, level):
193
    """Declare LU locking needs for a level
194

195
    While most LUs can just declare their locking needs at ExpandNames time,
196
    sometimes there's the need to calculate some locks after having acquired
197
    the ones before. This function is called just before acquiring locks at a
198
    particular level, but after acquiring the ones at lower levels, and permits
199
    such calculations. It can be used to modify self.needed_locks, and by
200
    default it does nothing.
201

202
    This function is only called if you have something already set in
203
    self.needed_locks for the level.
204

205
    @param level: Locking level which is going to be locked
206
    @type level: member of ganeti.locking.LEVELS
207

208
    """
209

    
210
  def CheckPrereq(self):
211
    """Check prerequisites for this LU.
212

213
    This method should check that the prerequisites for the execution
214
    of this LU are fulfilled. It can do internode communication, but
215
    it should be idempotent - no cluster or system changes are
216
    allowed.
217

218
    The method should raise errors.OpPrereqError in case something is
219
    not fulfilled. Its return value is ignored.
220

221
    This method should also update all the parameters of the opcode to
222
    their canonical form if it hasn't been done by ExpandNames before.
223

224
    """
225
    if self.tasklets is not None:
226
      for (idx, tl) in enumerate(self.tasklets):
227
        logging.debug("Checking prerequisites for tasklet %s/%s",
228
                      idx + 1, len(self.tasklets))
229
        tl.CheckPrereq()
230
    else:
231
      raise NotImplementedError
232

    
233
  def Exec(self, feedback_fn):
234
    """Execute the LU.
235

236
    This method should implement the actual work. It should raise
237
    errors.OpExecError for failures that are somewhat dealt with in
238
    code, or expected.
239

240
    """
241
    if self.tasklets is not None:
242
      for (idx, tl) in enumerate(self.tasklets):
243
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
244
        tl.Exec(feedback_fn)
245
    else:
246
      raise NotImplementedError
247

    
248
  def BuildHooksEnv(self):
249
    """Build hooks environment for this LU.
250

251
    This method should return a three-node tuple consisting of: a dict
252
    containing the environment that will be used for running the
253
    specific hook for this LU, a list of node names on which the hook
254
    should run before the execution, and a list of node names on which
255
    the hook should run after the execution.
256

257
    The keys of the dict must not have 'GANETI_' prefixed as this will
258
    be handled in the hooks runner. Also note additional keys will be
259
    added by the hooks runner. If the LU doesn't define any
260
    environment, an empty dict (and not None) should be returned.
261

262
    No nodes should be returned as an empty list (and not None).
263

264
    Note that if the HPATH for a LU class is None, this function will
265
    not be called.
266

267
    """
268
    raise NotImplementedError
269

    
270
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
271
    """Notify the LU about the results of its hooks.
272

273
    This method is called every time a hooks phase is executed, and notifies
274
    the Logical Unit about the hooks' result. The LU can then use it to alter
275
    its result based on the hooks.  By default the method does nothing and the
276
    previous result is passed back unchanged but any LU can define it if it
277
    wants to use the local cluster hook-scripts somehow.
278

279
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
280
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
281
    @param hook_results: the results of the multi-node hooks rpc call
282
    @param feedback_fn: function used send feedback back to the caller
283
    @param lu_result: the previous Exec result this LU had, or None
284
        in the PRE phase
285
    @return: the new Exec result, based on the previous result
286
        and hook results
287

288
    """
289
    # API must be kept, thus we ignore the unused argument and could
290
    # be a function warnings
291
    # pylint: disable-msg=W0613,R0201
292
    return lu_result
293

    
294
  def _ExpandAndLockInstance(self):
295
    """Helper function to expand and lock an instance.
296

297
    Many LUs that work on an instance take its name in self.op.instance_name
298
    and need to expand it and then declare the expanded name for locking. This
299
    function does it, and then updates self.op.instance_name to the expanded
300
    name. It also initializes needed_locks as a dict, if this hasn't been done
301
    before.
302

303
    """
304
    if self.needed_locks is None:
305
      self.needed_locks = {}
306
    else:
307
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
308
        "_ExpandAndLockInstance called with instance-level locks set"
309
    self.op.instance_name = _ExpandInstanceName(self.cfg,
310
                                                self.op.instance_name)
311
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
312

    
313
  def _LockInstancesNodes(self, primary_only=False):
314
    """Helper function to declare instances' nodes for locking.
315

316
    This function should be called after locking one or more instances to lock
317
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
318
    with all primary or secondary nodes for instances already locked and
319
    present in self.needed_locks[locking.LEVEL_INSTANCE].
320

321
    It should be called from DeclareLocks, and for safety only works if
322
    self.recalculate_locks[locking.LEVEL_NODE] is set.
323

324
    In the future it may grow parameters to just lock some instance's nodes, or
325
    to just lock primaries or secondary nodes, if needed.
326

327
    If should be called in DeclareLocks in a way similar to::
328

329
      if level == locking.LEVEL_NODE:
330
        self._LockInstancesNodes()
331

332
    @type primary_only: boolean
333
    @param primary_only: only lock primary nodes of locked instances
334

335
    """
336
    assert locking.LEVEL_NODE in self.recalculate_locks, \
337
      "_LockInstancesNodes helper function called with no nodes to recalculate"
338

    
339
    # TODO: check if we're really been called with the instance locks held
340

    
341
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
342
    # future we might want to have different behaviors depending on the value
343
    # of self.recalculate_locks[locking.LEVEL_NODE]
344
    wanted_nodes = []
345
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
346
      instance = self.context.cfg.GetInstanceInfo(instance_name)
347
      wanted_nodes.append(instance.primary_node)
348
      if not primary_only:
349
        wanted_nodes.extend(instance.secondary_nodes)
350

    
351
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
352
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
353
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
354
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
355

    
356
    del self.recalculate_locks[locking.LEVEL_NODE]
357

    
358

    
359
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
360
  """Simple LU which runs no hooks.
361

362
  This LU is intended as a parent for other LogicalUnits which will
363
  run no hooks, in order to reduce duplicate code.
364

365
  """
366
  HPATH = None
367
  HTYPE = None
368

    
369
  def BuildHooksEnv(self):
370
    """Empty BuildHooksEnv for NoHooksLu.
371

372
    This just raises an error.
373

374
    """
375
    assert False, "BuildHooksEnv called for NoHooksLUs"
376

    
377

    
378
class Tasklet:
379
  """Tasklet base class.
380

381
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
382
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
383
  tasklets know nothing about locks.
384

385
  Subclasses must follow these rules:
386
    - Implement CheckPrereq
387
    - Implement Exec
388

389
  """
390
  def __init__(self, lu):
391
    self.lu = lu
392

    
393
    # Shortcuts
394
    self.cfg = lu.cfg
395
    self.rpc = lu.rpc
396

    
397
  def CheckPrereq(self):
398
    """Check prerequisites for this tasklets.
399

400
    This method should check whether the prerequisites for the execution of
401
    this tasklet are fulfilled. It can do internode communication, but it
402
    should be idempotent - no cluster or system changes are allowed.
403

404
    The method should raise errors.OpPrereqError in case something is not
405
    fulfilled. Its return value is ignored.
406

407
    This method should also update all parameters to their canonical form if it
408
    hasn't been done before.
409

410
    """
411
    raise NotImplementedError
412

    
413
  def Exec(self, feedback_fn):
414
    """Execute the tasklet.
415

416
    This method should implement the actual work. It should raise
417
    errors.OpExecError for failures that are somewhat dealt with in code, or
418
    expected.
419

420
    """
421
    raise NotImplementedError
422

    
423

    
424
def _GetWantedNodes(lu, nodes):
425
  """Returns list of checked and expanded node names.
426

427
  @type lu: L{LogicalUnit}
428
  @param lu: the logical unit on whose behalf we execute
429
  @type nodes: list
430
  @param nodes: list of node names or None for all nodes
431
  @rtype: list
432
  @return: the list of nodes, sorted
433
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
434

435
  """
436
  if not isinstance(nodes, list):
437
    raise errors.OpPrereqError("Invalid argument type 'nodes'",
438
                               errors.ECODE_INVAL)
439

    
440
  if not nodes:
441
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
442
      " non-empty list of nodes whose name is to be expanded.")
443

    
444
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
445
  return utils.NiceSort(wanted)
446

    
447

    
448
def _GetWantedInstances(lu, instances):
449
  """Returns list of checked and expanded instance names.
450

451
  @type lu: L{LogicalUnit}
452
  @param lu: the logical unit on whose behalf we execute
453
  @type instances: list
454
  @param instances: list of instance names or None for all instances
455
  @rtype: list
456
  @return: the list of instances, sorted
457
  @raise errors.OpPrereqError: if the instances parameter is wrong type
458
  @raise errors.OpPrereqError: if any of the passed instances is not found
459

460
  """
461
  if not isinstance(instances, list):
462
    raise errors.OpPrereqError("Invalid argument type 'instances'",
463
                               errors.ECODE_INVAL)
464

    
465
  if instances:
466
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
467
  else:
468
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
469
  return wanted
470

    
471

    
472
def _CheckOutputFields(static, dynamic, selected):
473
  """Checks whether all selected fields are valid.
474

475
  @type static: L{utils.FieldSet}
476
  @param static: static fields set
477
  @type dynamic: L{utils.FieldSet}
478
  @param dynamic: dynamic fields set
479

480
  """
481
  f = utils.FieldSet()
482
  f.Extend(static)
483
  f.Extend(dynamic)
484

    
485
  delta = f.NonMatching(selected)
486
  if delta:
487
    raise errors.OpPrereqError("Unknown output fields selected: %s"
488
                               % ",".join(delta), errors.ECODE_INVAL)
489

    
490

    
491
def _CheckBooleanOpField(op, name):
492
  """Validates boolean opcode parameters.
493

494
  This will ensure that an opcode parameter is either a boolean value,
495
  or None (but that it always exists).
496

497
  """
498
  val = getattr(op, name, None)
499
  if not (val is None or isinstance(val, bool)):
500
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
501
                               (name, str(val)), errors.ECODE_INVAL)
502
  setattr(op, name, val)
503

    
504

    
505
def _CheckGlobalHvParams(params):
506
  """Validates that given hypervisor params are not global ones.
507

508
  This will ensure that instances don't get customised versions of
509
  global params.
510

511
  """
512
  used_globals = constants.HVC_GLOBALS.intersection(params)
513
  if used_globals:
514
    msg = ("The following hypervisor parameters are global and cannot"
515
           " be customized at instance level, please modify them at"
516
           " cluster level: %s" % utils.CommaJoin(used_globals))
517
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
518

    
519

    
520
def _CheckNodeOnline(lu, node):
521
  """Ensure that a given node is online.
522

523
  @param lu: the LU on behalf of which we make the check
524
  @param node: the node to check
525
  @raise errors.OpPrereqError: if the node is offline
526

527
  """
528
  if lu.cfg.GetNodeInfo(node).offline:
529
    raise errors.OpPrereqError("Can't use offline node %s" % node,
530
                               errors.ECODE_INVAL)
531

    
532

    
533
def _CheckNodeNotDrained(lu, node):
534
  """Ensure that a given node is not drained.
535

536
  @param lu: the LU on behalf of which we make the check
537
  @param node: the node to check
538
  @raise errors.OpPrereqError: if the node is drained
539

540
  """
541
  if lu.cfg.GetNodeInfo(node).drained:
542
    raise errors.OpPrereqError("Can't use drained node %s" % node,
543
                               errors.ECODE_INVAL)
544

    
545

    
546
def _CheckNodeHasOS(lu, node, os_name, force_variant):
547
  """Ensure that a node supports a given OS.
548

549
  @param lu: the LU on behalf of which we make the check
550
  @param node: the node to check
551
  @param os_name: the OS to query about
552
  @param force_variant: whether to ignore variant errors
553
  @raise errors.OpPrereqError: if the node is not supporting the OS
554

555
  """
556
  result = lu.rpc.call_os_get(node, os_name)
557
  result.Raise("OS '%s' not in supported OS list for node %s" %
558
               (os_name, node),
559
               prereq=True, ecode=errors.ECODE_INVAL)
560
  if not force_variant:
561
    _CheckOSVariant(result.payload, os_name)
562

    
563

    
564
def _RequireFileStorage():
565
  """Checks that file storage is enabled.
566

567
  @raise errors.OpPrereqError: when file storage is disabled
568

569
  """
570
  if not constants.ENABLE_FILE_STORAGE:
571
    raise errors.OpPrereqError("File storage disabled at configure time",
572
                               errors.ECODE_INVAL)
573

    
574

    
575
def _CheckDiskTemplate(template):
576
  """Ensure a given disk template is valid.
577

578
  """
579
  if template not in constants.DISK_TEMPLATES:
580
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
581
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
582
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
583
  if template == constants.DT_FILE:
584
    _RequireFileStorage()
585

    
586

    
587
def _CheckStorageType(storage_type):
588
  """Ensure a given storage type is valid.
589

590
  """
591
  if storage_type not in constants.VALID_STORAGE_TYPES:
592
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
593
                               errors.ECODE_INVAL)
594
  if storage_type == constants.ST_FILE:
595
    _RequireFileStorage()
596

    
597

    
598

    
599
def _CheckInstanceDown(lu, instance, reason):
600
  """Ensure that an instance is not running."""
601
  if instance.admin_up:
602
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
603
                               (instance.name, reason), errors.ECODE_STATE)
604

    
605
  pnode = instance.primary_node
606
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
607
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
608
              prereq=True, ecode=errors.ECODE_ENVIRON)
609

    
610
  if instance.name in ins_l.payload:
611
    raise errors.OpPrereqError("Instance %s is running, %s" %
612
                               (instance.name, reason), errors.ECODE_STATE)
613

    
614

    
615
def _ExpandItemName(fn, name, kind):
616
  """Expand an item name.
617

618
  @param fn: the function to use for expansion
619
  @param name: requested item name
620
  @param kind: text description ('Node' or 'Instance')
621
  @return: the resolved (full) name
622
  @raise errors.OpPrereqError: if the item is not found
623

624
  """
625
  full_name = fn(name)
626
  if full_name is None:
627
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
628
                               errors.ECODE_NOENT)
629
  return full_name
630

    
631

    
632
def _ExpandNodeName(cfg, name):
633
  """Wrapper over L{_ExpandItemName} for nodes."""
634
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
635

    
636

    
637
def _ExpandInstanceName(cfg, name):
638
  """Wrapper over L{_ExpandItemName} for instance."""
639
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
640

    
641

    
642
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
643
                          memory, vcpus, nics, disk_template, disks,
644
                          bep, hvp, hypervisor_name):
645
  """Builds instance related env variables for hooks
646

647
  This builds the hook environment from individual variables.
648

649
  @type name: string
650
  @param name: the name of the instance
651
  @type primary_node: string
652
  @param primary_node: the name of the instance's primary node
653
  @type secondary_nodes: list
654
  @param secondary_nodes: list of secondary nodes as strings
655
  @type os_type: string
656
  @param os_type: the name of the instance's OS
657
  @type status: boolean
658
  @param status: the should_run status of the instance
659
  @type memory: string
660
  @param memory: the memory size of the instance
661
  @type vcpus: string
662
  @param vcpus: the count of VCPUs the instance has
663
  @type nics: list
664
  @param nics: list of tuples (ip, mac, mode, link) representing
665
      the NICs the instance has
666
  @type disk_template: string
667
  @param disk_template: the disk template of the instance
668
  @type disks: list
669
  @param disks: the list of (size, mode) pairs
670
  @type bep: dict
671
  @param bep: the backend parameters for the instance
672
  @type hvp: dict
673
  @param hvp: the hypervisor parameters for the instance
674
  @type hypervisor_name: string
675
  @param hypervisor_name: the hypervisor for the instance
676
  @rtype: dict
677
  @return: the hook environment for this instance
678

679
  """
680
  if status:
681
    str_status = "up"
682
  else:
683
    str_status = "down"
684
  env = {
685
    "OP_TARGET": name,
686
    "INSTANCE_NAME": name,
687
    "INSTANCE_PRIMARY": primary_node,
688
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
689
    "INSTANCE_OS_TYPE": os_type,
690
    "INSTANCE_STATUS": str_status,
691
    "INSTANCE_MEMORY": memory,
692
    "INSTANCE_VCPUS": vcpus,
693
    "INSTANCE_DISK_TEMPLATE": disk_template,
694
    "INSTANCE_HYPERVISOR": hypervisor_name,
695
  }
696

    
697
  if nics:
698
    nic_count = len(nics)
699
    for idx, (ip, mac, mode, link) in enumerate(nics):
700
      if ip is None:
701
        ip = ""
702
      env["INSTANCE_NIC%d_IP" % idx] = ip
703
      env["INSTANCE_NIC%d_MAC" % idx] = mac
704
      env["INSTANCE_NIC%d_MODE" % idx] = mode
705
      env["INSTANCE_NIC%d_LINK" % idx] = link
706
      if mode == constants.NIC_MODE_BRIDGED:
707
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
708
  else:
709
    nic_count = 0
710

    
711
  env["INSTANCE_NIC_COUNT"] = nic_count
712

    
713
  if disks:
714
    disk_count = len(disks)
715
    for idx, (size, mode) in enumerate(disks):
716
      env["INSTANCE_DISK%d_SIZE" % idx] = size
717
      env["INSTANCE_DISK%d_MODE" % idx] = mode
718
  else:
719
    disk_count = 0
720

    
721
  env["INSTANCE_DISK_COUNT"] = disk_count
722

    
723
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
724
    for key, value in source.items():
725
      env["INSTANCE_%s_%s" % (kind, key)] = value
726

    
727
  return env
728

    
729

    
730
def _NICListToTuple(lu, nics):
731
  """Build a list of nic information tuples.
732

733
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
734
  value in LUQueryInstanceData.
735

736
  @type lu:  L{LogicalUnit}
737
  @param lu: the logical unit on whose behalf we execute
738
  @type nics: list of L{objects.NIC}
739
  @param nics: list of nics to convert to hooks tuples
740

741
  """
742
  hooks_nics = []
743
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
744
  for nic in nics:
745
    ip = nic.ip
746
    mac = nic.mac
747
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
748
    mode = filled_params[constants.NIC_MODE]
749
    link = filled_params[constants.NIC_LINK]
750
    hooks_nics.append((ip, mac, mode, link))
751
  return hooks_nics
752

    
753

    
754
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
755
  """Builds instance related env variables for hooks from an object.
756

757
  @type lu: L{LogicalUnit}
758
  @param lu: the logical unit on whose behalf we execute
759
  @type instance: L{objects.Instance}
760
  @param instance: the instance for which we should build the
761
      environment
762
  @type override: dict
763
  @param override: dictionary with key/values that will override
764
      our values
765
  @rtype: dict
766
  @return: the hook environment dictionary
767

768
  """
769
  cluster = lu.cfg.GetClusterInfo()
770
  bep = cluster.FillBE(instance)
771
  hvp = cluster.FillHV(instance)
772
  args = {
773
    'name': instance.name,
774
    'primary_node': instance.primary_node,
775
    'secondary_nodes': instance.secondary_nodes,
776
    'os_type': instance.os,
777
    'status': instance.admin_up,
778
    'memory': bep[constants.BE_MEMORY],
779
    'vcpus': bep[constants.BE_VCPUS],
780
    'nics': _NICListToTuple(lu, instance.nics),
781
    'disk_template': instance.disk_template,
782
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
783
    'bep': bep,
784
    'hvp': hvp,
785
    'hypervisor_name': instance.hypervisor,
786
  }
787
  if override:
788
    args.update(override)
789
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
790

    
791

    
792
def _AdjustCandidatePool(lu, exceptions):
793
  """Adjust the candidate pool after node operations.
794

795
  """
796
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
797
  if mod_list:
798
    lu.LogInfo("Promoted nodes to master candidate role: %s",
799
               utils.CommaJoin(node.name for node in mod_list))
800
    for name in mod_list:
801
      lu.context.ReaddNode(name)
802
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
803
  if mc_now > mc_max:
804
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
805
               (mc_now, mc_max))
806

    
807

    
808
def _DecideSelfPromotion(lu, exceptions=None):
809
  """Decide whether I should promote myself as a master candidate.
810

811
  """
812
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
813
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
814
  # the new node will increase mc_max with one, so:
815
  mc_should = min(mc_should + 1, cp_size)
816
  return mc_now < mc_should
817

    
818

    
819
def _CheckNicsBridgesExist(lu, target_nics, target_node,
820
                               profile=constants.PP_DEFAULT):
821
  """Check that the brigdes needed by a list of nics exist.
822

823
  """
824
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
825
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
826
                for nic in target_nics]
827
  brlist = [params[constants.NIC_LINK] for params in paramslist
828
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
829
  if brlist:
830
    result = lu.rpc.call_bridges_exist(target_node, brlist)
831
    result.Raise("Error checking bridges on destination node '%s'" %
832
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
833

    
834

    
835
def _CheckInstanceBridgesExist(lu, instance, node=None):
836
  """Check that the brigdes needed by an instance exist.
837

838
  """
839
  if node is None:
840
    node = instance.primary_node
841
  _CheckNicsBridgesExist(lu, instance.nics, node)
842

    
843

    
844
def _CheckOSVariant(os_obj, name):
845
  """Check whether an OS name conforms to the os variants specification.
846

847
  @type os_obj: L{objects.OS}
848
  @param os_obj: OS object to check
849
  @type name: string
850
  @param name: OS name passed by the user, to check for validity
851

852
  """
853
  if not os_obj.supported_variants:
854
    return
855
  try:
856
    variant = name.split("+", 1)[1]
857
  except IndexError:
858
    raise errors.OpPrereqError("OS name must include a variant",
859
                               errors.ECODE_INVAL)
860

    
861
  if variant not in os_obj.supported_variants:
862
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
863

    
864

    
865
def _GetNodeInstancesInner(cfg, fn):
866
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
867

    
868

    
869
def _GetNodeInstances(cfg, node_name):
870
  """Returns a list of all primary and secondary instances on a node.
871

872
  """
873

    
874
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
875

    
876

    
877
def _GetNodePrimaryInstances(cfg, node_name):
878
  """Returns primary instances on a node.
879

880
  """
881
  return _GetNodeInstancesInner(cfg,
882
                                lambda inst: node_name == inst.primary_node)
883

    
884

    
885
def _GetNodeSecondaryInstances(cfg, node_name):
886
  """Returns secondary instances on a node.
887

888
  """
889
  return _GetNodeInstancesInner(cfg,
890
                                lambda inst: node_name in inst.secondary_nodes)
891

    
892

    
893
def _GetStorageTypeArgs(cfg, storage_type):
894
  """Returns the arguments for a storage type.
895

896
  """
897
  # Special case for file storage
898
  if storage_type == constants.ST_FILE:
899
    # storage.FileStorage wants a list of storage directories
900
    return [[cfg.GetFileStorageDir()]]
901

    
902
  return []
903

    
904

    
905
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
906
  faulty = []
907

    
908
  for dev in instance.disks:
909
    cfg.SetDiskID(dev, node_name)
910

    
911
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
912
  result.Raise("Failed to get disk status from node %s" % node_name,
913
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
914

    
915
  for idx, bdev_status in enumerate(result.payload):
916
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
917
      faulty.append(idx)
918

    
919
  return faulty
920

    
921

    
922
def _FormatTimestamp(secs):
923
  """Formats a Unix timestamp with the local timezone.
924

925
  """
926
  return time.strftime("%F %T %Z", time.gmtime(secs))
927

    
928

    
929
class LUPostInitCluster(LogicalUnit):
930
  """Logical unit for running hooks after cluster initialization.
931

932
  """
933
  HPATH = "cluster-init"
934
  HTYPE = constants.HTYPE_CLUSTER
935
  _OP_REQP = []
936

    
937
  def BuildHooksEnv(self):
938
    """Build hooks env.
939

940
    """
941
    env = {"OP_TARGET": self.cfg.GetClusterName()}
942
    mn = self.cfg.GetMasterNode()
943
    return env, [], [mn]
944

    
945
  def CheckPrereq(self):
946
    """No prerequisites to check.
947

948
    """
949
    return True
950

    
951
  def Exec(self, feedback_fn):
952
    """Nothing to do.
953

954
    """
955
    return True
956

    
957

    
958
class LUDestroyCluster(LogicalUnit):
959
  """Logical unit for destroying the cluster.
960

961
  """
962
  HPATH = "cluster-destroy"
963
  HTYPE = constants.HTYPE_CLUSTER
964
  _OP_REQP = []
965

    
966
  def BuildHooksEnv(self):
967
    """Build hooks env.
968

969
    """
970
    env = {"OP_TARGET": self.cfg.GetClusterName()}
971
    return env, [], []
972

    
973
  def CheckPrereq(self):
974
    """Check prerequisites.
975

976
    This checks whether the cluster is empty.
977

978
    Any errors are signaled by raising errors.OpPrereqError.
979

980
    """
981
    master = self.cfg.GetMasterNode()
982

    
983
    nodelist = self.cfg.GetNodeList()
984
    if len(nodelist) != 1 or nodelist[0] != master:
985
      raise errors.OpPrereqError("There are still %d node(s) in"
986
                                 " this cluster." % (len(nodelist) - 1),
987
                                 errors.ECODE_INVAL)
988
    instancelist = self.cfg.GetInstanceList()
989
    if instancelist:
990
      raise errors.OpPrereqError("There are still %d instance(s) in"
991
                                 " this cluster." % len(instancelist),
992
                                 errors.ECODE_INVAL)
993

    
994
  def Exec(self, feedback_fn):
995
    """Destroys the cluster.
996

997
    """
998
    master = self.cfg.GetMasterNode()
999
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1000

    
1001
    # Run post hooks on master node before it's removed
1002
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1003
    try:
1004
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1005
    except:
1006
      # pylint: disable-msg=W0702
1007
      self.LogWarning("Errors occurred running hooks on %s" % master)
1008

    
1009
    result = self.rpc.call_node_stop_master(master, False)
1010
    result.Raise("Could not disable the master role")
1011

    
1012
    if modify_ssh_setup:
1013
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1014
      utils.CreateBackup(priv_key)
1015
      utils.CreateBackup(pub_key)
1016

    
1017
    return master
1018

    
1019

    
1020
def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
1021
                            warn_days=constants.SSL_CERT_EXPIRATION_WARN,
1022
                            error_days=constants.SSL_CERT_EXPIRATION_ERROR):
1023
  """Verifies certificate details for LUVerifyCluster.
1024

1025
  """
1026
  if expired:
1027
    msg = "Certificate %s is expired" % filename
1028

    
1029
    if not_before is not None and not_after is not None:
1030
      msg += (" (valid from %s to %s)" %
1031
              (_FormatTimestamp(not_before),
1032
               _FormatTimestamp(not_after)))
1033
    elif not_before is not None:
1034
      msg += " (valid from %s)" % _FormatTimestamp(not_before)
1035
    elif not_after is not None:
1036
      msg += " (valid until %s)" % _FormatTimestamp(not_after)
1037

    
1038
    return (LUVerifyCluster.ETYPE_ERROR, msg)
1039

    
1040
  elif not_before is not None and not_before > now:
1041
    return (LUVerifyCluster.ETYPE_WARNING,
1042
            "Certificate %s not yet valid (valid from %s)" %
1043
            (filename, _FormatTimestamp(not_before)))
1044

    
1045
  elif not_after is not None:
1046
    remaining_days = int((not_after - now) / (24 * 3600))
1047

    
1048
    msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
1049

    
1050
    if remaining_days <= error_days:
1051
      return (LUVerifyCluster.ETYPE_ERROR, msg)
1052

    
1053
    if remaining_days <= warn_days:
1054
      return (LUVerifyCluster.ETYPE_WARNING, msg)
1055

    
1056
  return (None, None)
1057

    
1058

    
1059
def _VerifyCertificate(filename):
1060
  """Verifies a certificate for LUVerifyCluster.
1061

1062
  @type filename: string
1063
  @param filename: Path to PEM file
1064

1065
  """
1066
  try:
1067
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1068
                                           utils.ReadFile(filename))
1069
  except Exception, err: # pylint: disable-msg=W0703
1070
    return (LUVerifyCluster.ETYPE_ERROR,
1071
            "Failed to load X509 certificate %s: %s" % (filename, err))
1072

    
1073
  # Depending on the pyOpenSSL version, this can just return (None, None)
1074
  (not_before, not_after) = utils.GetX509CertValidity(cert)
1075

    
1076
  return _VerifyCertificateInner(filename, cert.has_expired(),
1077
                                 not_before, not_after, time.time())
1078

    
1079

    
1080
class LUVerifyCluster(LogicalUnit):
1081
  """Verifies the cluster status.
1082

1083
  """
1084
  HPATH = "cluster-verify"
1085
  HTYPE = constants.HTYPE_CLUSTER
1086
  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1087
  REQ_BGL = False
1088

    
1089
  TCLUSTER = "cluster"
1090
  TNODE = "node"
1091
  TINSTANCE = "instance"
1092

    
1093
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1094
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1095
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1096
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1097
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1098
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1099
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1100
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1101
  ENODEDRBD = (TNODE, "ENODEDRBD")
1102
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1103
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1104
  ENODEHV = (TNODE, "ENODEHV")
1105
  ENODELVM = (TNODE, "ENODELVM")
1106
  ENODEN1 = (TNODE, "ENODEN1")
1107
  ENODENET = (TNODE, "ENODENET")
1108
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1109
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1110
  ENODERPC = (TNODE, "ENODERPC")
1111
  ENODESSH = (TNODE, "ENODESSH")
1112
  ENODEVERSION = (TNODE, "ENODEVERSION")
1113
  ENODESETUP = (TNODE, "ENODESETUP")
1114
  ENODETIME = (TNODE, "ENODETIME")
1115

    
1116
  ETYPE_FIELD = "code"
1117
  ETYPE_ERROR = "ERROR"
1118
  ETYPE_WARNING = "WARNING"
1119

    
1120
  class NodeImage(object):
1121
    """A class representing the logical and physical status of a node.
1122

1123
    @ivar volumes: a structure as returned from
1124
        L{ganeti.backend.GetVolumeList} (runtime)
1125
    @ivar instances: a list of running instances (runtime)
1126
    @ivar pinst: list of configured primary instances (config)
1127
    @ivar sinst: list of configured secondary instances (config)
1128
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1129
        of this node (config)
1130
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1131
    @ivar dfree: free disk, as reported by the node (runtime)
1132
    @ivar offline: the offline status (config)
1133
    @type rpc_fail: boolean
1134
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1135
        not whether the individual keys were correct) (runtime)
1136
    @type lvm_fail: boolean
1137
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1138
    @type hyp_fail: boolean
1139
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1140
    @type ghost: boolean
1141
    @ivar ghost: whether this is a known node or not (config)
1142

1143
    """
1144
    def __init__(self, offline=False):
1145
      self.volumes = {}
1146
      self.instances = []
1147
      self.pinst = []
1148
      self.sinst = []
1149
      self.sbp = {}
1150
      self.mfree = 0
1151
      self.dfree = 0
1152
      self.offline = offline
1153
      self.rpc_fail = False
1154
      self.lvm_fail = False
1155
      self.hyp_fail = False
1156
      self.ghost = False
1157

    
1158
  def ExpandNames(self):
1159
    self.needed_locks = {
1160
      locking.LEVEL_NODE: locking.ALL_SET,
1161
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1162
    }
1163
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1164

    
1165
  def _Error(self, ecode, item, msg, *args, **kwargs):
1166
    """Format an error message.
1167

1168
    Based on the opcode's error_codes parameter, either format a
1169
    parseable error code, or a simpler error string.
1170

1171
    This must be called only from Exec and functions called from Exec.
1172

1173
    """
1174
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1175
    itype, etxt = ecode
1176
    # first complete the msg
1177
    if args:
1178
      msg = msg % args
1179
    # then format the whole message
1180
    if self.op.error_codes:
1181
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1182
    else:
1183
      if item:
1184
        item = " " + item
1185
      else:
1186
        item = ""
1187
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1188
    # and finally report it via the feedback_fn
1189
    self._feedback_fn("  - %s" % msg)
1190

    
1191
  def _ErrorIf(self, cond, *args, **kwargs):
1192
    """Log an error message if the passed condition is True.
1193

1194
    """
1195
    cond = bool(cond) or self.op.debug_simulate_errors
1196
    if cond:
1197
      self._Error(*args, **kwargs)
1198
    # do not mark the operation as failed for WARN cases only
1199
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1200
      self.bad = self.bad or cond
1201

    
1202
  def _VerifyNode(self, ninfo, nresult):
1203
    """Run multiple tests against a node.
1204

1205
    Test list:
1206

1207
      - compares ganeti version
1208
      - checks vg existence and size > 20G
1209
      - checks config file checksum
1210
      - checks ssh to other nodes
1211

1212
    @type ninfo: L{objects.Node}
1213
    @param ninfo: the node to check
1214
    @param nresult: the results from the node
1215
    @rtype: boolean
1216
    @return: whether overall this call was successful (and we can expect
1217
         reasonable values in the respose)
1218

1219
    """
1220
    node = ninfo.name
1221
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1222

    
1223
    # main result, nresult should be a non-empty dict
1224
    test = not nresult or not isinstance(nresult, dict)
1225
    _ErrorIf(test, self.ENODERPC, node,
1226
                  "unable to verify node: no data returned")
1227
    if test:
1228
      return False
1229

    
1230
    # compares ganeti version
1231
    local_version = constants.PROTOCOL_VERSION
1232
    remote_version = nresult.get("version", None)
1233
    test = not (remote_version and
1234
                isinstance(remote_version, (list, tuple)) and
1235
                len(remote_version) == 2)
1236
    _ErrorIf(test, self.ENODERPC, node,
1237
             "connection to node returned invalid data")
1238
    if test:
1239
      return False
1240

    
1241
    test = local_version != remote_version[0]
1242
    _ErrorIf(test, self.ENODEVERSION, node,
1243
             "incompatible protocol versions: master %s,"
1244
             " node %s", local_version, remote_version[0])
1245
    if test:
1246
      return False
1247

    
1248
    # node seems compatible, we can actually try to look into its results
1249

    
1250
    # full package version
1251
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1252
                  self.ENODEVERSION, node,
1253
                  "software version mismatch: master %s, node %s",
1254
                  constants.RELEASE_VERSION, remote_version[1],
1255
                  code=self.ETYPE_WARNING)
1256

    
1257
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1258
    if isinstance(hyp_result, dict):
1259
      for hv_name, hv_result in hyp_result.iteritems():
1260
        test = hv_result is not None
1261
        _ErrorIf(test, self.ENODEHV, node,
1262
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1263

    
1264

    
1265
    test = nresult.get(constants.NV_NODESETUP,
1266
                           ["Missing NODESETUP results"])
1267
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1268
             "; ".join(test))
1269

    
1270
    return True
1271

    
1272
  def _VerifyNodeTime(self, ninfo, nresult,
1273
                      nvinfo_starttime, nvinfo_endtime):
1274
    """Check the node time.
1275

1276
    @type ninfo: L{objects.Node}
1277
    @param ninfo: the node to check
1278
    @param nresult: the remote results for the node
1279
    @param nvinfo_starttime: the start time of the RPC call
1280
    @param nvinfo_endtime: the end time of the RPC call
1281

1282
    """
1283
    node = ninfo.name
1284
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1285

    
1286
    ntime = nresult.get(constants.NV_TIME, None)
1287
    try:
1288
      ntime_merged = utils.MergeTime(ntime)
1289
    except (ValueError, TypeError):
1290
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1291
      return
1292

    
1293
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1294
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1295
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1296
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1297
    else:
1298
      ntime_diff = None
1299

    
1300
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1301
             "Node time diverges by at least %s from master node time",
1302
             ntime_diff)
1303

    
1304
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1305
    """Check the node time.
1306

1307
    @type ninfo: L{objects.Node}
1308
    @param ninfo: the node to check
1309
    @param nresult: the remote results for the node
1310
    @param vg_name: the configured VG name
1311

1312
    """
1313
    if vg_name is None:
1314
      return
1315

    
1316
    node = ninfo.name
1317
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1318

    
1319
    # checks vg existence and size > 20G
1320
    vglist = nresult.get(constants.NV_VGLIST, None)
1321
    test = not vglist
1322
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1323
    if not test:
1324
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1325
                                            constants.MIN_VG_SIZE)
1326
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1327

    
1328
    # check pv names
1329
    pvlist = nresult.get(constants.NV_PVLIST, None)
1330
    test = pvlist is None
1331
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1332
    if not test:
1333
      # check that ':' is not present in PV names, since it's a
1334
      # special character for lvcreate (denotes the range of PEs to
1335
      # use on the PV)
1336
      for _, pvname, owner_vg in pvlist:
1337
        test = ":" in pvname
1338
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1339
                 " '%s' of VG '%s'", pvname, owner_vg)
1340

    
1341
  def _VerifyNodeNetwork(self, ninfo, nresult):
1342
    """Check the node time.
1343

1344
    @type ninfo: L{objects.Node}
1345
    @param ninfo: the node to check
1346
    @param nresult: the remote results for the node
1347

1348
    """
1349
    node = ninfo.name
1350
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1351

    
1352
    test = constants.NV_NODELIST not in nresult
1353
    _ErrorIf(test, self.ENODESSH, node,
1354
             "node hasn't returned node ssh connectivity data")
1355
    if not test:
1356
      if nresult[constants.NV_NODELIST]:
1357
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1358
          _ErrorIf(True, self.ENODESSH, node,
1359
                   "ssh communication with node '%s': %s", a_node, a_msg)
1360

    
1361
    test = constants.NV_NODENETTEST not in nresult
1362
    _ErrorIf(test, self.ENODENET, node,
1363
             "node hasn't returned node tcp connectivity data")
1364
    if not test:
1365
      if nresult[constants.NV_NODENETTEST]:
1366
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1367
        for anode in nlist:
1368
          _ErrorIf(True, self.ENODENET, node,
1369
                   "tcp communication with node '%s': %s",
1370
                   anode, nresult[constants.NV_NODENETTEST][anode])
1371

    
1372
  def _VerifyInstance(self, instance, instanceconfig, node_image):
1373
    """Verify an instance.
1374

1375
    This function checks to see if the required block devices are
1376
    available on the instance's node.
1377

1378
    """
1379
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1380
    node_current = instanceconfig.primary_node
1381

    
1382
    node_vol_should = {}
1383
    instanceconfig.MapLVsByNode(node_vol_should)
1384

    
1385
    for node in node_vol_should:
1386
      n_img = node_image[node]
1387
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1388
        # ignore missing volumes on offline or broken nodes
1389
        continue
1390
      for volume in node_vol_should[node]:
1391
        test = volume not in n_img.volumes
1392
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1393
                 "volume %s missing on node %s", volume, node)
1394

    
1395
    if instanceconfig.admin_up:
1396
      pri_img = node_image[node_current]
1397
      test = instance not in pri_img.instances and not pri_img.offline
1398
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1399
               "instance not running on its primary node %s",
1400
               node_current)
1401

    
1402
    for node, n_img in node_image.items():
1403
      if (not node == node_current):
1404
        test = instance in n_img.instances
1405
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1406
                 "instance should not run on node %s", node)
1407

    
1408
  def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1409
    """Verify if there are any unknown volumes in the cluster.
1410

1411
    The .os, .swap and backup volumes are ignored. All other volumes are
1412
    reported as unknown.
1413

1414
    """
1415
    for node, n_img in node_image.items():
1416
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1417
        # skip non-healthy nodes
1418
        continue
1419
      for volume in n_img.volumes:
1420
        test = (node not in node_vol_should or
1421
                volume not in node_vol_should[node])
1422
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1423
                      "volume %s is unknown", volume)
1424

    
1425
  def _VerifyOrphanInstances(self, instancelist, node_image):
1426
    """Verify the list of running instances.
1427

1428
    This checks what instances are running but unknown to the cluster.
1429

1430
    """
1431
    for node, n_img in node_image.items():
1432
      for o_inst in n_img.instances:
1433
        test = o_inst not in instancelist
1434
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1435
                      "instance %s on node %s should not exist", o_inst, node)
1436

    
1437
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1438
    """Verify N+1 Memory Resilience.
1439

1440
    Check that if one single node dies we can still start all the
1441
    instances it was primary for.
1442

1443
    """
1444
    for node, n_img in node_image.items():
1445
      # This code checks that every node which is now listed as
1446
      # secondary has enough memory to host all instances it is
1447
      # supposed to should a single other node in the cluster fail.
1448
      # FIXME: not ready for failover to an arbitrary node
1449
      # FIXME: does not support file-backed instances
1450
      # WARNING: we currently take into account down instances as well
1451
      # as up ones, considering that even if they're down someone
1452
      # might want to start them even in the event of a node failure.
1453
      for prinode, instances in n_img.sbp.items():
1454
        needed_mem = 0
1455
        for instance in instances:
1456
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1457
          if bep[constants.BE_AUTO_BALANCE]:
1458
            needed_mem += bep[constants.BE_MEMORY]
1459
        test = n_img.mfree < needed_mem
1460
        self._ErrorIf(test, self.ENODEN1, node,
1461
                      "not enough memory on to accommodate"
1462
                      " failovers should peer node %s fail", prinode)
1463

    
1464
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1465
                       master_files):
1466
    """Verifies and computes the node required file checksums.
1467

1468
    @type ninfo: L{objects.Node}
1469
    @param ninfo: the node to check
1470
    @param nresult: the remote results for the node
1471
    @param file_list: required list of files
1472
    @param local_cksum: dictionary of local files and their checksums
1473
    @param master_files: list of files that only masters should have
1474

1475
    """
1476
    node = ninfo.name
1477
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1478

    
1479
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1480
    test = not isinstance(remote_cksum, dict)
1481
    _ErrorIf(test, self.ENODEFILECHECK, node,
1482
             "node hasn't returned file checksum data")
1483
    if test:
1484
      return
1485

    
1486
    for file_name in file_list:
1487
      node_is_mc = ninfo.master_candidate
1488
      must_have = (file_name not in master_files) or node_is_mc
1489
      # missing
1490
      test1 = file_name not in remote_cksum
1491
      # invalid checksum
1492
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1493
      # existing and good
1494
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1495
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1496
               "file '%s' missing", file_name)
1497
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1498
               "file '%s' has wrong checksum", file_name)
1499
      # not candidate and this is not a must-have file
1500
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1501
               "file '%s' should not exist on non master"
1502
               " candidates (and the file is outdated)", file_name)
1503
      # all good, except non-master/non-must have combination
1504
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1505
               "file '%s' should not exist"
1506
               " on non master candidates", file_name)
1507

    
1508
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1509
    """Verifies and the node DRBD status.
1510

1511
    @type ninfo: L{objects.Node}
1512
    @param ninfo: the node to check
1513
    @param nresult: the remote results for the node
1514
    @param instanceinfo: the dict of instances
1515
    @param drbd_map: the DRBD map as returned by
1516
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1517

1518
    """
1519
    node = ninfo.name
1520
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1521

    
1522
    # compute the DRBD minors
1523
    node_drbd = {}
1524
    for minor, instance in drbd_map[node].items():
1525
      test = instance not in instanceinfo
1526
      _ErrorIf(test, self.ECLUSTERCFG, None,
1527
               "ghost instance '%s' in temporary DRBD map", instance)
1528
        # ghost instance should not be running, but otherwise we
1529
        # don't give double warnings (both ghost instance and
1530
        # unallocated minor in use)
1531
      if test:
1532
        node_drbd[minor] = (instance, False)
1533
      else:
1534
        instance = instanceinfo[instance]
1535
        node_drbd[minor] = (instance.name, instance.admin_up)
1536

    
1537
    # and now check them
1538
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1539
    test = not isinstance(used_minors, (tuple, list))
1540
    _ErrorIf(test, self.ENODEDRBD, node,
1541
             "cannot parse drbd status file: %s", str(used_minors))
1542
    if test:
1543
      # we cannot check drbd status
1544
      return
1545

    
1546
    for minor, (iname, must_exist) in node_drbd.items():
1547
      test = minor not in used_minors and must_exist
1548
      _ErrorIf(test, self.ENODEDRBD, node,
1549
               "drbd minor %d of instance %s is not active", minor, iname)
1550
    for minor in used_minors:
1551
      test = minor not in node_drbd
1552
      _ErrorIf(test, self.ENODEDRBD, node,
1553
               "unallocated drbd minor %d is in use", minor)
1554

    
1555
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1556
    """Verifies and updates the node volume data.
1557

1558
    This function will update a L{NodeImage}'s internal structures
1559
    with data from the remote call.
1560

1561
    @type ninfo: L{objects.Node}
1562
    @param ninfo: the node to check
1563
    @param nresult: the remote results for the node
1564
    @param nimg: the node image object
1565
    @param vg_name: the configured VG name
1566

1567
    """
1568
    node = ninfo.name
1569
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1570

    
1571
    nimg.lvm_fail = True
1572
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1573
    if vg_name is None:
1574
      pass
1575
    elif isinstance(lvdata, basestring):
1576
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1577
               utils.SafeEncode(lvdata))
1578
    elif not isinstance(lvdata, dict):
1579
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1580
    else:
1581
      nimg.volumes = lvdata
1582
      nimg.lvm_fail = False
1583

    
1584
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1585
    """Verifies and updates the node instance list.
1586

1587
    If the listing was successful, then updates this node's instance
1588
    list. Otherwise, it marks the RPC call as failed for the instance
1589
    list key.
1590

1591
    @type ninfo: L{objects.Node}
1592
    @param ninfo: the node to check
1593
    @param nresult: the remote results for the node
1594
    @param nimg: the node image object
1595

1596
    """
1597
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1598
    test = not isinstance(idata, list)
1599
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1600
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1601
    if test:
1602
      nimg.hyp_fail = True
1603
    else:
1604
      nimg.instances = idata
1605

    
1606
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1607
    """Verifies and computes a node information map
1608

1609
    @type ninfo: L{objects.Node}
1610
    @param ninfo: the node to check
1611
    @param nresult: the remote results for the node
1612
    @param nimg: the node image object
1613
    @param vg_name: the configured VG name
1614

1615
    """
1616
    node = ninfo.name
1617
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1618

    
1619
    # try to read free memory (from the hypervisor)
1620
    hv_info = nresult.get(constants.NV_HVINFO, None)
1621
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1622
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1623
    if not test:
1624
      try:
1625
        nimg.mfree = int(hv_info["memory_free"])
1626
      except (ValueError, TypeError):
1627
        _ErrorIf(True, self.ENODERPC, node,
1628
                 "node returned invalid nodeinfo, check hypervisor")
1629

    
1630
    # FIXME: devise a free space model for file based instances as well
1631
    if vg_name is not None:
1632
      test = (constants.NV_VGLIST not in nresult or
1633
              vg_name not in nresult[constants.NV_VGLIST])
1634
      _ErrorIf(test, self.ENODELVM, node,
1635
               "node didn't return data for the volume group '%s'"
1636
               " - it is either missing or broken", vg_name)
1637
      if not test:
1638
        try:
1639
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1640
        except (ValueError, TypeError):
1641
          _ErrorIf(True, self.ENODERPC, node,
1642
                   "node returned invalid LVM info, check LVM status")
1643

    
1644
  def CheckPrereq(self):
1645
    """Check prerequisites.
1646

1647
    Transform the list of checks we're going to skip into a set and check that
1648
    all its members are valid.
1649

1650
    """
1651
    self.skip_set = frozenset(self.op.skip_checks)
1652
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1653
      raise errors.OpPrereqError("Invalid checks to be skipped specified",
1654
                                 errors.ECODE_INVAL)
1655

    
1656
  def BuildHooksEnv(self):
1657
    """Build hooks env.
1658

1659
    Cluster-Verify hooks just ran in the post phase and their failure makes
1660
    the output be logged in the verify output and the verification to fail.
1661

1662
    """
1663
    all_nodes = self.cfg.GetNodeList()
1664
    env = {
1665
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1666
      }
1667
    for node in self.cfg.GetAllNodesInfo().values():
1668
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1669

    
1670
    return env, [], all_nodes
1671

    
1672
  def Exec(self, feedback_fn):
1673
    """Verify integrity of cluster, performing various test on nodes.
1674

1675
    """
1676
    self.bad = False
1677
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1678
    verbose = self.op.verbose
1679
    self._feedback_fn = feedback_fn
1680
    feedback_fn("* Verifying global settings")
1681
    for msg in self.cfg.VerifyConfig():
1682
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1683

    
1684
    # Check the cluster certificates
1685
    for cert_filename in constants.ALL_CERT_FILES:
1686
      (errcode, msg) = _VerifyCertificate(cert_filename)
1687
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1688

    
1689
    vg_name = self.cfg.GetVGName()
1690
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1691
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1692
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1693
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1694
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1695
                        for iname in instancelist)
1696
    i_non_redundant = [] # Non redundant instances
1697
    i_non_a_balanced = [] # Non auto-balanced instances
1698
    n_offline = 0 # Count of offline nodes
1699
    n_drained = 0 # Count of nodes being drained
1700
    node_vol_should = {}
1701

    
1702
    # FIXME: verify OS list
1703
    # do local checksums
1704
    master_files = [constants.CLUSTER_CONF_FILE]
1705

    
1706
    file_names = ssconf.SimpleStore().GetFileList()
1707
    file_names.extend(constants.ALL_CERT_FILES)
1708
    file_names.extend(master_files)
1709

    
1710
    local_checksums = utils.FingerprintFiles(file_names)
1711

    
1712
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1713
    node_verify_param = {
1714
      constants.NV_FILELIST: file_names,
1715
      constants.NV_NODELIST: [node.name for node in nodeinfo
1716
                              if not node.offline],
1717
      constants.NV_HYPERVISOR: hypervisors,
1718
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1719
                                  node.secondary_ip) for node in nodeinfo
1720
                                 if not node.offline],
1721
      constants.NV_INSTANCELIST: hypervisors,
1722
      constants.NV_VERSION: None,
1723
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1724
      constants.NV_NODESETUP: None,
1725
      constants.NV_TIME: None,
1726
      }
1727

    
1728
    if vg_name is not None:
1729
      node_verify_param[constants.NV_VGLIST] = None
1730
      node_verify_param[constants.NV_LVLIST] = vg_name
1731
      node_verify_param[constants.NV_PVLIST] = [vg_name]
1732
      node_verify_param[constants.NV_DRBDLIST] = None
1733

    
1734
    # Build our expected cluster state
1735
    node_image = dict((node.name, self.NodeImage(offline=node.offline))
1736
                      for node in nodeinfo)
1737

    
1738
    for instance in instancelist:
1739
      inst_config = instanceinfo[instance]
1740

    
1741
      for nname in inst_config.all_nodes:
1742
        if nname not in node_image:
1743
          # ghost node
1744
          gnode = self.NodeImage()
1745
          gnode.ghost = True
1746
          node_image[nname] = gnode
1747

    
1748
      inst_config.MapLVsByNode(node_vol_should)
1749

    
1750
      pnode = inst_config.primary_node
1751
      node_image[pnode].pinst.append(instance)
1752

    
1753
      for snode in inst_config.secondary_nodes:
1754
        nimg = node_image[snode]
1755
        nimg.sinst.append(instance)
1756
        if pnode not in nimg.sbp:
1757
          nimg.sbp[pnode] = []
1758
        nimg.sbp[pnode].append(instance)
1759

    
1760
    # At this point, we have the in-memory data structures complete,
1761
    # except for the runtime information, which we'll gather next
1762

    
1763
    # Due to the way our RPC system works, exact response times cannot be
1764
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1765
    # time before and after executing the request, we can at least have a time
1766
    # window.
1767
    nvinfo_starttime = time.time()
1768
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1769
                                           self.cfg.GetClusterName())
1770
    nvinfo_endtime = time.time()
1771

    
1772
    cluster = self.cfg.GetClusterInfo()
1773
    master_node = self.cfg.GetMasterNode()
1774
    all_drbd_map = self.cfg.ComputeDRBDMap()
1775

    
1776
    feedback_fn("* Verifying node status")
1777
    for node_i in nodeinfo:
1778
      node = node_i.name
1779
      nimg = node_image[node]
1780

    
1781
      if node_i.offline:
1782
        if verbose:
1783
          feedback_fn("* Skipping offline node %s" % (node,))
1784
        n_offline += 1
1785
        continue
1786

    
1787
      if node == master_node:
1788
        ntype = "master"
1789
      elif node_i.master_candidate:
1790
        ntype = "master candidate"
1791
      elif node_i.drained:
1792
        ntype = "drained"
1793
        n_drained += 1
1794
      else:
1795
        ntype = "regular"
1796
      if verbose:
1797
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1798

    
1799
      msg = all_nvinfo[node].fail_msg
1800
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1801
      if msg:
1802
        nimg.rpc_fail = True
1803
        continue
1804

    
1805
      nresult = all_nvinfo[node].payload
1806

    
1807
      nimg.call_ok = self._VerifyNode(node_i, nresult)
1808
      self._VerifyNodeNetwork(node_i, nresult)
1809
      self._VerifyNodeLVM(node_i, nresult, vg_name)
1810
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1811
                            master_files)
1812
      self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1813
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1814

    
1815
      self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1816
      self._UpdateNodeInstances(node_i, nresult, nimg)
1817
      self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1818

    
1819
    feedback_fn("* Verifying instance status")
1820
    for instance in instancelist:
1821
      if verbose:
1822
        feedback_fn("* Verifying instance %s" % instance)
1823
      inst_config = instanceinfo[instance]
1824
      self._VerifyInstance(instance, inst_config, node_image)
1825
      inst_nodes_offline = []
1826

    
1827
      pnode = inst_config.primary_node
1828
      pnode_img = node_image[pnode]
1829
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1830
               self.ENODERPC, pnode, "instance %s, connection to"
1831
               " primary node failed", instance)
1832

    
1833
      if pnode_img.offline:
1834
        inst_nodes_offline.append(pnode)
1835

    
1836
      # If the instance is non-redundant we cannot survive losing its primary
1837
      # node, so we are not N+1 compliant. On the other hand we have no disk
1838
      # templates with more than one secondary so that situation is not well
1839
      # supported either.
1840
      # FIXME: does not support file-backed instances
1841
      if not inst_config.secondary_nodes:
1842
        i_non_redundant.append(instance)
1843
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1844
               instance, "instance has multiple secondary nodes: %s",
1845
               utils.CommaJoin(inst_config.secondary_nodes),
1846
               code=self.ETYPE_WARNING)
1847

    
1848
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1849
        i_non_a_balanced.append(instance)
1850

    
1851
      for snode in inst_config.secondary_nodes:
1852
        s_img = node_image[snode]
1853
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1854
                 "instance %s, connection to secondary node failed", instance)
1855

    
1856
        if s_img.offline:
1857
          inst_nodes_offline.append(snode)
1858

    
1859
      # warn that the instance lives on offline nodes
1860
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1861
               "instance lives on offline node(s) %s",
1862
               utils.CommaJoin(inst_nodes_offline))
1863
      # ... or ghost nodes
1864
      for node in inst_config.all_nodes:
1865
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
1866
                 "instance lives on ghost node %s", node)
1867

    
1868
    feedback_fn("* Verifying orphan volumes")
1869
    self._VerifyOrphanVolumes(node_vol_should, node_image)
1870

    
1871
    feedback_fn("* Verifying oprhan instances")
1872
    self._VerifyOrphanInstances(instancelist, node_image)
1873

    
1874
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1875
      feedback_fn("* Verifying N+1 Memory redundancy")
1876
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
1877

    
1878
    feedback_fn("* Other Notes")
1879
    if i_non_redundant:
1880
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1881
                  % len(i_non_redundant))
1882

    
1883
    if i_non_a_balanced:
1884
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1885
                  % len(i_non_a_balanced))
1886

    
1887
    if n_offline:
1888
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
1889

    
1890
    if n_drained:
1891
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
1892

    
1893
    return not self.bad
1894

    
1895
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1896
    """Analyze the post-hooks' result
1897

1898
    This method analyses the hook result, handles it, and sends some
1899
    nicely-formatted feedback back to the user.
1900

1901
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1902
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1903
    @param hooks_results: the results of the multi-node hooks rpc call
1904
    @param feedback_fn: function used send feedback back to the caller
1905
    @param lu_result: previous Exec result
1906
    @return: the new Exec result, based on the previous result
1907
        and hook results
1908

1909
    """
1910
    # We only really run POST phase hooks, and are only interested in
1911
    # their results
1912
    if phase == constants.HOOKS_PHASE_POST:
1913
      # Used to change hooks' output to proper indentation
1914
      indent_re = re.compile('^', re.M)
1915
      feedback_fn("* Hooks Results")
1916
      assert hooks_results, "invalid result from hooks"
1917

    
1918
      for node_name in hooks_results:
1919
        res = hooks_results[node_name]
1920
        msg = res.fail_msg
1921
        test = msg and not res.offline
1922
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
1923
                      "Communication failure in hooks execution: %s", msg)
1924
        if res.offline or msg:
1925
          # No need to investigate payload if node is offline or gave an error.
1926
          # override manually lu_result here as _ErrorIf only
1927
          # overrides self.bad
1928
          lu_result = 1
1929
          continue
1930
        for script, hkr, output in res.payload:
1931
          test = hkr == constants.HKR_FAIL
1932
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
1933
                        "Script %s failed, output:", script)
1934
          if test:
1935
            output = indent_re.sub('      ', output)
1936
            feedback_fn("%s" % output)
1937
            lu_result = 0
1938

    
1939
      return lu_result
1940

    
1941

    
1942
class LUVerifyDisks(NoHooksLU):
1943
  """Verifies the cluster disks status.
1944

1945
  """
1946
  _OP_REQP = []
1947
  REQ_BGL = False
1948

    
1949
  def ExpandNames(self):
1950
    self.needed_locks = {
1951
      locking.LEVEL_NODE: locking.ALL_SET,
1952
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1953
    }
1954
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1955

    
1956
  def CheckPrereq(self):
1957
    """Check prerequisites.
1958

1959
    This has no prerequisites.
1960

1961
    """
1962
    pass
1963

    
1964
  def Exec(self, feedback_fn):
1965
    """Verify integrity of cluster disks.
1966

1967
    @rtype: tuple of three items
1968
    @return: a tuple of (dict of node-to-node_error, list of instances
1969
        which need activate-disks, dict of instance: (node, volume) for
1970
        missing volumes
1971

1972
    """
1973
    result = res_nodes, res_instances, res_missing = {}, [], {}
1974

    
1975
    vg_name = self.cfg.GetVGName()
1976
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1977
    instances = [self.cfg.GetInstanceInfo(name)
1978
                 for name in self.cfg.GetInstanceList()]
1979

    
1980
    nv_dict = {}
1981
    for inst in instances:
1982
      inst_lvs = {}
1983
      if (not inst.admin_up or
1984
          inst.disk_template not in constants.DTS_NET_MIRROR):
1985
        continue
1986
      inst.MapLVsByNode(inst_lvs)
1987
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1988
      for node, vol_list in inst_lvs.iteritems():
1989
        for vol in vol_list:
1990
          nv_dict[(node, vol)] = inst
1991

    
1992
    if not nv_dict:
1993
      return result
1994

    
1995
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1996

    
1997
    for node in nodes:
1998
      # node_volume
1999
      node_res = node_lvs[node]
2000
      if node_res.offline:
2001
        continue
2002
      msg = node_res.fail_msg
2003
      if msg:
2004
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2005
        res_nodes[node] = msg
2006
        continue
2007

    
2008
      lvs = node_res.payload
2009
      for lv_name, (_, _, lv_online) in lvs.items():
2010
        inst = nv_dict.pop((node, lv_name), None)
2011
        if (not lv_online and inst is not None
2012
            and inst.name not in res_instances):
2013
          res_instances.append(inst.name)
2014

    
2015
    # any leftover items in nv_dict are missing LVs, let's arrange the
2016
    # data better
2017
    for key, inst in nv_dict.iteritems():
2018
      if inst.name not in res_missing:
2019
        res_missing[inst.name] = []
2020
      res_missing[inst.name].append(key)
2021

    
2022
    return result
2023

    
2024

    
2025
class LURepairDiskSizes(NoHooksLU):
2026
  """Verifies the cluster disks sizes.
2027

2028
  """
2029
  _OP_REQP = ["instances"]
2030
  REQ_BGL = False
2031

    
2032
  def ExpandNames(self):
2033
    if not isinstance(self.op.instances, list):
2034
      raise errors.OpPrereqError("Invalid argument type 'instances'",
2035
                                 errors.ECODE_INVAL)
2036

    
2037
    if self.op.instances:
2038
      self.wanted_names = []
2039
      for name in self.op.instances:
2040
        full_name = _ExpandInstanceName(self.cfg, name)
2041
        self.wanted_names.append(full_name)
2042
      self.needed_locks = {
2043
        locking.LEVEL_NODE: [],
2044
        locking.LEVEL_INSTANCE: self.wanted_names,
2045
        }
2046
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2047
    else:
2048
      self.wanted_names = None
2049
      self.needed_locks = {
2050
        locking.LEVEL_NODE: locking.ALL_SET,
2051
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2052
        }
2053
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2054

    
2055
  def DeclareLocks(self, level):
2056
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2057
      self._LockInstancesNodes(primary_only=True)
2058

    
2059
  def CheckPrereq(self):
2060
    """Check prerequisites.
2061

2062
    This only checks the optional instance list against the existing names.
2063

2064
    """
2065
    if self.wanted_names is None:
2066
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2067

    
2068
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2069
                             in self.wanted_names]
2070

    
2071
  def _EnsureChildSizes(self, disk):
2072
    """Ensure children of the disk have the needed disk size.
2073

2074
    This is valid mainly for DRBD8 and fixes an issue where the
2075
    children have smaller disk size.
2076

2077
    @param disk: an L{ganeti.objects.Disk} object
2078

2079
    """
2080
    if disk.dev_type == constants.LD_DRBD8:
2081
      assert disk.children, "Empty children for DRBD8?"
2082
      fchild = disk.children[0]
2083
      mismatch = fchild.size < disk.size
2084
      if mismatch:
2085
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2086
                     fchild.size, disk.size)
2087
        fchild.size = disk.size
2088

    
2089
      # and we recurse on this child only, not on the metadev
2090
      return self._EnsureChildSizes(fchild) or mismatch
2091
    else:
2092
      return False
2093

    
2094
  def Exec(self, feedback_fn):
2095
    """Verify the size of cluster disks.
2096

2097
    """
2098
    # TODO: check child disks too
2099
    # TODO: check differences in size between primary/secondary nodes
2100
    per_node_disks = {}
2101
    for instance in self.wanted_instances:
2102
      pnode = instance.primary_node
2103
      if pnode not in per_node_disks:
2104
        per_node_disks[pnode] = []
2105
      for idx, disk in enumerate(instance.disks):
2106
        per_node_disks[pnode].append((instance, idx, disk))
2107

    
2108
    changed = []
2109
    for node, dskl in per_node_disks.items():
2110
      newl = [v[2].Copy() for v in dskl]
2111
      for dsk in newl:
2112
        self.cfg.SetDiskID(dsk, node)
2113
      result = self.rpc.call_blockdev_getsizes(node, newl)
2114
      if result.fail_msg:
2115
        self.LogWarning("Failure in blockdev_getsizes call to node"
2116
                        " %s, ignoring", node)
2117
        continue
2118
      if len(result.data) != len(dskl):
2119
        self.LogWarning("Invalid result from node %s, ignoring node results",
2120
                        node)
2121
        continue
2122
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2123
        if size is None:
2124
          self.LogWarning("Disk %d of instance %s did not return size"
2125
                          " information, ignoring", idx, instance.name)
2126
          continue
2127
        if not isinstance(size, (int, long)):
2128
          self.LogWarning("Disk %d of instance %s did not return valid"
2129
                          " size information, ignoring", idx, instance.name)
2130
          continue
2131
        size = size >> 20
2132
        if size != disk.size:
2133
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2134
                       " correcting: recorded %d, actual %d", idx,
2135
                       instance.name, disk.size, size)
2136
          disk.size = size
2137
          self.cfg.Update(instance, feedback_fn)
2138
          changed.append((instance.name, idx, size))
2139
        if self._EnsureChildSizes(disk):
2140
          self.cfg.Update(instance, feedback_fn)
2141
          changed.append((instance.name, idx, disk.size))
2142
    return changed
2143

    
2144

    
2145
class LURenameCluster(LogicalUnit):
2146
  """Rename the cluster.
2147

2148
  """
2149
  HPATH = "cluster-rename"
2150
  HTYPE = constants.HTYPE_CLUSTER
2151
  _OP_REQP = ["name"]
2152

    
2153
  def BuildHooksEnv(self):
2154
    """Build hooks env.
2155

2156
    """
2157
    env = {
2158
      "OP_TARGET": self.cfg.GetClusterName(),
2159
      "NEW_NAME": self.op.name,
2160
      }
2161
    mn = self.cfg.GetMasterNode()
2162
    all_nodes = self.cfg.GetNodeList()
2163
    return env, [mn], all_nodes
2164

    
2165
  def CheckPrereq(self):
2166
    """Verify that the passed name is a valid one.
2167

2168
    """
2169
    hostname = utils.GetHostInfo(self.op.name)
2170

    
2171
    new_name = hostname.name
2172
    self.ip = new_ip = hostname.ip
2173
    old_name = self.cfg.GetClusterName()
2174
    old_ip = self.cfg.GetMasterIP()
2175
    if new_name == old_name and new_ip == old_ip:
2176
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2177
                                 " cluster has changed",
2178
                                 errors.ECODE_INVAL)
2179
    if new_ip != old_ip:
2180
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2181
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2182
                                   " reachable on the network. Aborting." %
2183
                                   new_ip, errors.ECODE_NOTUNIQUE)
2184

    
2185
    self.op.name = new_name
2186

    
2187
  def Exec(self, feedback_fn):
2188
    """Rename the cluster.
2189

2190
    """
2191
    clustername = self.op.name
2192
    ip = self.ip
2193

    
2194
    # shutdown the master IP
2195
    master = self.cfg.GetMasterNode()
2196
    result = self.rpc.call_node_stop_master(master, False)
2197
    result.Raise("Could not disable the master role")
2198

    
2199
    try:
2200
      cluster = self.cfg.GetClusterInfo()
2201
      cluster.cluster_name = clustername
2202
      cluster.master_ip = ip
2203
      self.cfg.Update(cluster, feedback_fn)
2204

    
2205
      # update the known hosts file
2206
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2207
      node_list = self.cfg.GetNodeList()
2208
      try:
2209
        node_list.remove(master)
2210
      except ValueError:
2211
        pass
2212
      result = self.rpc.call_upload_file(node_list,
2213
                                         constants.SSH_KNOWN_HOSTS_FILE)
2214
      for to_node, to_result in result.iteritems():
2215
        msg = to_result.fail_msg
2216
        if msg:
2217
          msg = ("Copy of file %s to node %s failed: %s" %
2218
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2219
          self.proc.LogWarning(msg)
2220

    
2221
    finally:
2222
      result = self.rpc.call_node_start_master(master, False, False)
2223
      msg = result.fail_msg
2224
      if msg:
2225
        self.LogWarning("Could not re-enable the master role on"
2226
                        " the master, please restart manually: %s", msg)
2227

    
2228

    
2229
def _RecursiveCheckIfLVMBased(disk):
2230
  """Check if the given disk or its children are lvm-based.
2231

2232
  @type disk: L{objects.Disk}
2233
  @param disk: the disk to check
2234
  @rtype: boolean
2235
  @return: boolean indicating whether a LD_LV dev_type was found or not
2236

2237
  """
2238
  if disk.children:
2239
    for chdisk in disk.children:
2240
      if _RecursiveCheckIfLVMBased(chdisk):
2241
        return True
2242
  return disk.dev_type == constants.LD_LV
2243

    
2244

    
2245
class LUSetClusterParams(LogicalUnit):
2246
  """Change the parameters of the cluster.
2247

2248
  """
2249
  HPATH = "cluster-modify"
2250
  HTYPE = constants.HTYPE_CLUSTER
2251
  _OP_REQP = []
2252
  REQ_BGL = False
2253

    
2254
  def CheckArguments(self):
2255
    """Check parameters
2256

2257
    """
2258
    if not hasattr(self.op, "candidate_pool_size"):
2259
      self.op.candidate_pool_size = None
2260
    if self.op.candidate_pool_size is not None:
2261
      try:
2262
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2263
      except (ValueError, TypeError), err:
2264
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2265
                                   str(err), errors.ECODE_INVAL)
2266
      if self.op.candidate_pool_size < 1:
2267
        raise errors.OpPrereqError("At least one master candidate needed",
2268
                                   errors.ECODE_INVAL)
2269

    
2270
    _CheckBooleanOpField(self.op, "maintain_node_health")
2271

    
2272
    if self.op.uid_pool:
2273
      uidpool.CheckUidPool(self.op.uid_pool)
2274

    
2275
  def ExpandNames(self):
2276
    # FIXME: in the future maybe other cluster params won't require checking on
2277
    # all nodes to be modified.
2278
    self.needed_locks = {
2279
      locking.LEVEL_NODE: locking.ALL_SET,
2280
    }
2281
    self.share_locks[locking.LEVEL_NODE] = 1
2282

    
2283
  def BuildHooksEnv(self):
2284
    """Build hooks env.
2285

2286
    """
2287
    env = {
2288
      "OP_TARGET": self.cfg.GetClusterName(),
2289
      "NEW_VG_NAME": self.op.vg_name,
2290
      }
2291
    mn = self.cfg.GetMasterNode()
2292
    return env, [mn], [mn]
2293

    
2294
  def CheckPrereq(self):
2295
    """Check prerequisites.
2296

2297
    This checks whether the given params don't conflict and
2298
    if the given volume group is valid.
2299

2300
    """
2301
    if self.op.vg_name is not None and not self.op.vg_name:
2302
      instances = self.cfg.GetAllInstancesInfo().values()
2303
      for inst in instances:
2304
        for disk in inst.disks:
2305
          if _RecursiveCheckIfLVMBased(disk):
2306
            raise errors.OpPrereqError("Cannot disable lvm storage while"
2307
                                       " lvm-based instances exist",
2308
                                       errors.ECODE_INVAL)
2309

    
2310
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2311

    
2312
    # if vg_name not None, checks given volume group on all nodes
2313
    if self.op.vg_name:
2314
      vglist = self.rpc.call_vg_list(node_list)
2315
      for node in node_list:
2316
        msg = vglist[node].fail_msg
2317
        if msg:
2318
          # ignoring down node
2319
          self.LogWarning("Error while gathering data on node %s"
2320
                          " (ignoring node): %s", node, msg)
2321
          continue
2322
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2323
                                              self.op.vg_name,
2324
                                              constants.MIN_VG_SIZE)
2325
        if vgstatus:
2326
          raise errors.OpPrereqError("Error on node '%s': %s" %
2327
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2328

    
2329
    self.cluster = cluster = self.cfg.GetClusterInfo()
2330
    # validate params changes
2331
    if self.op.beparams:
2332
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2333
      self.new_beparams = objects.FillDict(
2334
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2335

    
2336
    if self.op.nicparams:
2337
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2338
      self.new_nicparams = objects.FillDict(
2339
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2340
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2341
      nic_errors = []
2342

    
2343
      # check all instances for consistency
2344
      for instance in self.cfg.GetAllInstancesInfo().values():
2345
        for nic_idx, nic in enumerate(instance.nics):
2346
          params_copy = copy.deepcopy(nic.nicparams)
2347
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2348

    
2349
          # check parameter syntax
2350
          try:
2351
            objects.NIC.CheckParameterSyntax(params_filled)
2352
          except errors.ConfigurationError, err:
2353
            nic_errors.append("Instance %s, nic/%d: %s" %
2354
                              (instance.name, nic_idx, err))
2355

    
2356
          # if we're moving instances to routed, check that they have an ip
2357
          target_mode = params_filled[constants.NIC_MODE]
2358
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2359
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2360
                              (instance.name, nic_idx))
2361
      if nic_errors:
2362
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2363
                                   "\n".join(nic_errors))
2364

    
2365
    # hypervisor list/parameters
2366
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
2367
    if self.op.hvparams:
2368
      if not isinstance(self.op.hvparams, dict):
2369
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2370
                                   errors.ECODE_INVAL)
2371
      for hv_name, hv_dict in self.op.hvparams.items():
2372
        if hv_name not in self.new_hvparams:
2373
          self.new_hvparams[hv_name] = hv_dict
2374
        else:
2375
          self.new_hvparams[hv_name].update(hv_dict)
2376

    
2377
    # os hypervisor parameters
2378
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2379
    if self.op.os_hvp:
2380
      if not isinstance(self.op.os_hvp, dict):
2381
        raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2382
                                   errors.ECODE_INVAL)
2383
      for os_name, hvs in self.op.os_hvp.items():
2384
        if not isinstance(hvs, dict):
2385
          raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2386
                                      " input"), errors.ECODE_INVAL)
2387
        if os_name not in self.new_os_hvp:
2388
          self.new_os_hvp[os_name] = hvs
2389
        else:
2390
          for hv_name, hv_dict in hvs.items():
2391
            if hv_name not in self.new_os_hvp[os_name]:
2392
              self.new_os_hvp[os_name][hv_name] = hv_dict
2393
            else:
2394
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2395

    
2396
    if self.op.enabled_hypervisors is not None:
2397
      self.hv_list = self.op.enabled_hypervisors
2398
      if not self.hv_list:
2399
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2400
                                   " least one member",
2401
                                   errors.ECODE_INVAL)
2402
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2403
      if invalid_hvs:
2404
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2405
                                   " entries: %s" %
2406
                                   utils.CommaJoin(invalid_hvs),
2407
                                   errors.ECODE_INVAL)
2408
    else:
2409
      self.hv_list = cluster.enabled_hypervisors
2410

    
2411
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2412
      # either the enabled list has changed, or the parameters have, validate
2413
      for hv_name, hv_params in self.new_hvparams.items():
2414
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2415
            (self.op.enabled_hypervisors and
2416
             hv_name in self.op.enabled_hypervisors)):
2417
          # either this is a new hypervisor, or its parameters have changed
2418
          hv_class = hypervisor.GetHypervisor(hv_name)
2419
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2420
          hv_class.CheckParameterSyntax(hv_params)
2421
          _CheckHVParams(self, node_list, hv_name, hv_params)
2422

    
2423
    if self.op.os_hvp:
2424
      # no need to check any newly-enabled hypervisors, since the
2425
      # defaults have already been checked in the above code-block
2426
      for os_name, os_hvp in self.new_os_hvp.items():
2427
        for hv_name, hv_params in os_hvp.items():
2428
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2429
          # we need to fill in the new os_hvp on top of the actual hv_p
2430
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2431
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2432
          hv_class = hypervisor.GetHypervisor(hv_name)
2433
          hv_class.CheckParameterSyntax(new_osp)
2434
          _CheckHVParams(self, node_list, hv_name, new_osp)
2435

    
2436

    
2437
  def Exec(self, feedback_fn):
2438
    """Change the parameters of the cluster.
2439

2440
    """
2441
    if self.op.vg_name is not None:
2442
      new_volume = self.op.vg_name
2443
      if not new_volume:
2444
        new_volume = None
2445
      if new_volume != self.cfg.GetVGName():
2446
        self.cfg.SetVGName(new_volume)
2447
      else:
2448
        feedback_fn("Cluster LVM configuration already in desired"
2449
                    " state, not changing")
2450
    if self.op.hvparams:
2451
      self.cluster.hvparams = self.new_hvparams
2452
    if self.op.os_hvp:
2453
      self.cluster.os_hvp = self.new_os_hvp
2454
    if self.op.enabled_hypervisors is not None:
2455
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2456
    if self.op.beparams:
2457
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2458
    if self.op.nicparams:
2459
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2460

    
2461
    if self.op.candidate_pool_size is not None:
2462
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2463
      # we need to update the pool size here, otherwise the save will fail
2464
      _AdjustCandidatePool(self, [])
2465

    
2466
    if self.op.maintain_node_health is not None:
2467
      self.cluster.maintain_node_health = self.op.maintain_node_health
2468

    
2469
    if self.op.uid_pool is not None:
2470
      self.cluster.uid_pool = self.op.uid_pool
2471

    
2472
    self.cfg.Update(self.cluster, feedback_fn)
2473

    
2474

    
2475
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2476
  """Distribute additional files which are part of the cluster configuration.
2477

2478
  ConfigWriter takes care of distributing the config and ssconf files, but
2479
  there are more files which should be distributed to all nodes. This function
2480
  makes sure those are copied.
2481

2482
  @param lu: calling logical unit
2483
  @param additional_nodes: list of nodes not in the config to distribute to
2484

2485
  """
2486
  # 1. Gather target nodes
2487
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2488
  dist_nodes = lu.cfg.GetOnlineNodeList()
2489
  if additional_nodes is not None:
2490
    dist_nodes.extend(additional_nodes)
2491
  if myself.name in dist_nodes:
2492
    dist_nodes.remove(myself.name)
2493

    
2494
  # 2. Gather files to distribute
2495
  dist_files = set([constants.ETC_HOSTS,
2496
                    constants.SSH_KNOWN_HOSTS_FILE,
2497
                    constants.RAPI_CERT_FILE,
2498
                    constants.RAPI_USERS_FILE,
2499
                    constants.CONFD_HMAC_KEY,
2500
                   ])
2501

    
2502
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2503
  for hv_name in enabled_hypervisors:
2504
    hv_class = hypervisor.GetHypervisor(hv_name)
2505
    dist_files.update(hv_class.GetAncillaryFiles())
2506

    
2507
  # 3. Perform the files upload
2508
  for fname in dist_files:
2509
    if os.path.exists(fname):
2510
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2511
      for to_node, to_result in result.items():
2512
        msg = to_result.fail_msg
2513
        if msg:
2514
          msg = ("Copy of file %s to node %s failed: %s" %
2515
                 (fname, to_node, msg))
2516
          lu.proc.LogWarning(msg)
2517

    
2518

    
2519
class LURedistributeConfig(NoHooksLU):
2520
  """Force the redistribution of cluster configuration.
2521

2522
  This is a very simple LU.
2523

2524
  """
2525
  _OP_REQP = []
2526
  REQ_BGL = False
2527

    
2528
  def ExpandNames(self):
2529
    self.needed_locks = {
2530
      locking.LEVEL_NODE: locking.ALL_SET,
2531
    }
2532
    self.share_locks[locking.LEVEL_NODE] = 1
2533

    
2534
  def CheckPrereq(self):
2535
    """Check prerequisites.
2536

2537
    """
2538

    
2539
  def Exec(self, feedback_fn):
2540
    """Redistribute the configuration.
2541

2542
    """
2543
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2544
    _RedistributeAncillaryFiles(self)
2545

    
2546

    
2547
def _WaitForSync(lu, instance, oneshot=False):
2548
  """Sleep and poll for an instance's disk to sync.
2549

2550
  """
2551
  if not instance.disks:
2552
    return True
2553

    
2554
  if not oneshot:
2555
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2556

    
2557
  node = instance.primary_node
2558

    
2559
  for dev in instance.disks:
2560
    lu.cfg.SetDiskID(dev, node)
2561

    
2562
  # TODO: Convert to utils.Retry
2563

    
2564
  retries = 0
2565
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2566
  while True:
2567
    max_time = 0
2568
    done = True
2569
    cumul_degraded = False
2570
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2571
    msg = rstats.fail_msg
2572
    if msg:
2573
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2574
      retries += 1
2575
      if retries >= 10:
2576
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2577
                                 " aborting." % node)
2578
      time.sleep(6)
2579
      continue
2580
    rstats = rstats.payload
2581
    retries = 0
2582
    for i, mstat in enumerate(rstats):
2583
      if mstat is None:
2584
        lu.LogWarning("Can't compute data for node %s/%s",
2585
                           node, instance.disks[i].iv_name)
2586
        continue
2587

    
2588
      cumul_degraded = (cumul_degraded or
2589
                        (mstat.is_degraded and mstat.sync_percent is None))
2590
      if mstat.sync_percent is not None:
2591
        done = False
2592
        if mstat.estimated_time is not None:
2593
          rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2594
          max_time = mstat.estimated_time
2595
        else:
2596
          rem_time = "no time estimate"
2597
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2598
                        (instance.disks[i].iv_name, mstat.sync_percent,
2599
                         rem_time))
2600

    
2601
    # if we're done but degraded, let's do a few small retries, to
2602
    # make sure we see a stable and not transient situation; therefore
2603
    # we force restart of the loop
2604
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2605
      logging.info("Degraded disks found, %d retries left", degr_retries)
2606
      degr_retries -= 1
2607
      time.sleep(1)
2608
      continue
2609

    
2610
    if done or oneshot:
2611
      break
2612

    
2613
    time.sleep(min(60, max_time))
2614

    
2615
  if done:
2616
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2617
  return not cumul_degraded
2618

    
2619

    
2620
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2621
  """Check that mirrors are not degraded.
2622

2623
  The ldisk parameter, if True, will change the test from the
2624
  is_degraded attribute (which represents overall non-ok status for
2625
  the device(s)) to the ldisk (representing the local storage status).
2626

2627
  """
2628
  lu.cfg.SetDiskID(dev, node)
2629

    
2630
  result = True
2631

    
2632
  if on_primary or dev.AssembleOnSecondary():
2633
    rstats = lu.rpc.call_blockdev_find(node, dev)
2634
    msg = rstats.fail_msg
2635
    if msg:
2636
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2637
      result = False
2638
    elif not rstats.payload:
2639
      lu.LogWarning("Can't find disk on node %s", node)
2640
      result = False
2641
    else:
2642
      if ldisk:
2643
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2644
      else:
2645
        result = result and not rstats.payload.is_degraded
2646

    
2647
  if dev.children:
2648
    for child in dev.children:
2649
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2650

    
2651
  return result
2652

    
2653

    
2654
class LUDiagnoseOS(NoHooksLU):
2655
  """Logical unit for OS diagnose/query.
2656

2657
  """
2658
  _OP_REQP = ["output_fields", "names"]
2659
  REQ_BGL = False
2660
  _FIELDS_STATIC = utils.FieldSet()
2661
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2662
  # Fields that need calculation of global os validity
2663
  _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2664

    
2665
  def ExpandNames(self):
2666
    if self.op.names:
2667
      raise errors.OpPrereqError("Selective OS query not supported",
2668
                                 errors.ECODE_INVAL)
2669

    
2670
    _CheckOutputFields(static=self._FIELDS_STATIC,
2671
                       dynamic=self._FIELDS_DYNAMIC,
2672
                       selected=self.op.output_fields)
2673

    
2674
    # Lock all nodes, in shared mode
2675
    # Temporary removal of locks, should be reverted later
2676
    # TODO: reintroduce locks when they are lighter-weight
2677
    self.needed_locks = {}
2678
    #self.share_locks[locking.LEVEL_NODE] = 1
2679
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2680

    
2681
  def CheckPrereq(self):
2682
    """Check prerequisites.
2683

2684
    """
2685

    
2686
  @staticmethod
2687
  def _DiagnoseByOS(rlist):
2688
    """Remaps a per-node return list into an a per-os per-node dictionary
2689

2690
    @param rlist: a map with node names as keys and OS objects as values
2691

2692
    @rtype: dict
2693
    @return: a dictionary with osnames as keys and as value another map, with
2694
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
2695

2696
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2697
                                     (/srv/..., False, "invalid api")],
2698
                           "node2": [(/srv/..., True, "")]}
2699
          }
2700

2701
    """
2702
    all_os = {}
2703
    # we build here the list of nodes that didn't fail the RPC (at RPC
2704
    # level), so that nodes with a non-responding node daemon don't
2705
    # make all OSes invalid
2706
    good_nodes = [node_name for node_name in rlist
2707
                  if not rlist[node_name].fail_msg]
2708
    for node_name, nr in rlist.items():
2709
      if nr.fail_msg or not nr.payload:
2710
        continue
2711
      for name, path, status, diagnose, variants in nr.payload:
2712
        if name not in all_os:
2713
          # build a list of nodes for this os containing empty lists
2714
          # for each node in node_list
2715
          all_os[name] = {}
2716
          for nname in good_nodes:
2717
            all_os[name][nname] = []
2718
        all_os[name][node_name].append((path, status, diagnose, variants))
2719
    return all_os
2720

    
2721
  def Exec(self, feedback_fn):
2722
    """Compute the list of OSes.
2723

2724
    """
2725
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2726
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2727
    pol = self._DiagnoseByOS(node_data)
2728
    output = []
2729
    calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2730
    calc_variants = "variants" in self.op.output_fields
2731

    
2732
    for os_name, os_data in pol.items():
2733
      row = []
2734
      if calc_valid:
2735
        valid = True
2736
        variants = None
2737
        for osl in os_data.values():
2738
          valid = valid and osl and osl[0][1]
2739
          if not valid:
2740
            variants = None
2741
            break
2742
          if calc_variants:
2743
            node_variants = osl[0][3]
2744
            if variants is None:
2745
              variants = node_variants
2746
            else:
2747
              variants = [v for v in variants if v in node_variants]
2748

    
2749
      for field in self.op.output_fields:
2750
        if field == "name":
2751
          val = os_name
2752
        elif field == "valid":
2753
          val = valid
2754
        elif field == "node_status":
2755
          # this is just a copy of the dict
2756
          val = {}
2757
          for node_name, nos_list in os_data.items():
2758
            val[node_name] = nos_list
2759
        elif field == "variants":
2760
          val =  variants
2761
        else:
2762
          raise errors.ParameterError(field)
2763
        row.append(val)
2764
      output.append(row)
2765

    
2766
    return output
2767

    
2768

    
2769
class LURemoveNode(LogicalUnit):
2770
  """Logical unit for removing a node.
2771

2772
  """
2773
  HPATH = "node-remove"
2774
  HTYPE = constants.HTYPE_NODE
2775
  _OP_REQP = ["node_name"]
2776

    
2777
  def BuildHooksEnv(self):
2778
    """Build hooks env.
2779

2780
    This doesn't run on the target node in the pre phase as a failed
2781
    node would then be impossible to remove.
2782

2783
    """
2784
    env = {
2785
      "OP_TARGET": self.op.node_name,
2786
      "NODE_NAME": self.op.node_name,
2787
      }
2788
    all_nodes = self.cfg.GetNodeList()
2789
    try:
2790
      all_nodes.remove(self.op.node_name)
2791
    except ValueError:
2792
      logging.warning("Node %s which is about to be removed not found"
2793
                      " in the all nodes list", self.op.node_name)
2794
    return env, all_nodes, all_nodes
2795

    
2796
  def CheckPrereq(self):
2797
    """Check prerequisites.
2798

2799
    This checks:
2800
     - the node exists in the configuration
2801
     - it does not have primary or secondary instances
2802
     - it's not the master
2803

2804
    Any errors are signaled by raising errors.OpPrereqError.
2805

2806
    """
2807
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2808
    node = self.cfg.GetNodeInfo(self.op.node_name)
2809
    assert node is not None
2810

    
2811
    instance_list = self.cfg.GetInstanceList()
2812

    
2813
    masternode = self.cfg.GetMasterNode()
2814
    if node.name == masternode:
2815
      raise errors.OpPrereqError("Node is the master node,"
2816
                                 " you need to failover first.",
2817
                                 errors.ECODE_INVAL)
2818

    
2819
    for instance_name in instance_list:
2820
      instance = self.cfg.GetInstanceInfo(instance_name)
2821
      if node.name in instance.all_nodes:
2822
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2823
                                   " please remove first." % instance_name,
2824
                                   errors.ECODE_INVAL)
2825
    self.op.node_name = node.name
2826
    self.node = node
2827

    
2828
  def Exec(self, feedback_fn):
2829
    """Removes the node from the cluster.
2830

2831
    """
2832
    node = self.node
2833
    logging.info("Stopping the node daemon and removing configs from node %s",
2834
                 node.name)
2835

    
2836
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2837

    
2838
    # Promote nodes to master candidate as needed
2839
    _AdjustCandidatePool(self, exceptions=[node.name])
2840
    self.context.RemoveNode(node.name)
2841

    
2842
    # Run post hooks on the node before it's removed
2843
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2844
    try:
2845
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2846
    except:
2847
      # pylint: disable-msg=W0702
2848
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
2849

    
2850
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2851
    msg = result.fail_msg
2852
    if msg:
2853
      self.LogWarning("Errors encountered on the remote node while leaving"
2854
                      " the cluster: %s", msg)
2855

    
2856

    
2857
class LUQueryNodes(NoHooksLU):
2858
  """Logical unit for querying nodes.
2859

2860
  """
2861
  # pylint: disable-msg=W0142
2862
  _OP_REQP = ["output_fields", "names", "use_locking"]
2863
  REQ_BGL = False
2864

    
2865
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2866
                    "master_candidate", "offline", "drained"]
2867

    
2868
  _FIELDS_DYNAMIC = utils.FieldSet(
2869
    "dtotal", "dfree",
2870
    "mtotal", "mnode", "mfree",
2871
    "bootid",
2872
    "ctotal", "cnodes", "csockets",
2873
    )
2874

    
2875
  _FIELDS_STATIC = utils.FieldSet(*[
2876
    "pinst_cnt", "sinst_cnt",
2877
    "pinst_list", "sinst_list",
2878
    "pip", "sip", "tags",
2879
    "master",
2880
    "role"] + _SIMPLE_FIELDS
2881
    )
2882

    
2883
  def ExpandNames(self):
2884
    _CheckOutputFields(static=self._FIELDS_STATIC,
2885
                       dynamic=self._FIELDS_DYNAMIC,
2886
                       selected=self.op.output_fields)
2887

    
2888
    self.needed_locks = {}
2889
    self.share_locks[locking.LEVEL_NODE] = 1
2890

    
2891
    if self.op.names:
2892
      self.wanted = _GetWantedNodes(self, self.op.names)
2893
    else:
2894
      self.wanted = locking.ALL_SET
2895

    
2896
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2897
    self.do_locking = self.do_node_query and self.op.use_locking
2898
    if self.do_locking:
2899
      # if we don't request only static fields, we need to lock the nodes
2900
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2901

    
2902
  def CheckPrereq(self):
2903
    """Check prerequisites.
2904

2905
    """
2906
    # The validation of the node list is done in the _GetWantedNodes,
2907
    # if non empty, and if empty, there's no validation to do
2908
    pass
2909

    
2910
  def Exec(self, feedback_fn):
2911
    """Computes the list of nodes and their attributes.
2912

2913
    """
2914
    all_info = self.cfg.GetAllNodesInfo()
2915
    if self.do_locking:
2916
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2917
    elif self.wanted != locking.ALL_SET:
2918
      nodenames = self.wanted
2919
      missing = set(nodenames).difference(all_info.keys())
2920
      if missing:
2921
        raise errors.OpExecError(
2922
          "Some nodes were removed before retrieving their data: %s" % missing)
2923
    else:
2924
      nodenames = all_info.keys()
2925

    
2926
    nodenames = utils.NiceSort(nodenames)
2927
    nodelist = [all_info[name] for name in nodenames]
2928

    
2929
    # begin data gathering
2930

    
2931
    if self.do_node_query:
2932
      live_data = {}
2933
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2934
                                          self.cfg.GetHypervisorType())
2935
      for name in nodenames:
2936
        nodeinfo = node_data[name]
2937
        if not nodeinfo.fail_msg and nodeinfo.payload:
2938
          nodeinfo = nodeinfo.payload
2939
          fn = utils.TryConvert
2940
          live_data[name] = {
2941
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2942
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2943
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2944
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2945
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2946
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2947
            "bootid": nodeinfo.get('bootid', None),
2948
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2949
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2950
            }
2951
        else:
2952
          live_data[name] = {}
2953
    else:
2954
      live_data = dict.fromkeys(nodenames, {})
2955

    
2956
    node_to_primary = dict([(name, set()) for name in nodenames])
2957
    node_to_secondary = dict([(name, set()) for name in nodenames])
2958

    
2959
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2960
                             "sinst_cnt", "sinst_list"))
2961
    if inst_fields & frozenset(self.op.output_fields):
2962
      inst_data = self.cfg.GetAllInstancesInfo()
2963

    
2964
      for inst in inst_data.values():
2965
        if inst.primary_node in node_to_primary:
2966
          node_to_primary[inst.primary_node].add(inst.name)
2967
        for secnode in inst.secondary_nodes:
2968
          if secnode in node_to_secondary:
2969
            node_to_secondary[secnode].add(inst.name)
2970

    
2971
    master_node = self.cfg.GetMasterNode()
2972

    
2973
    # end data gathering
2974

    
2975
    output = []
2976
    for node in nodelist:
2977
      node_output = []
2978
      for field in self.op.output_fields:
2979
        if field in self._SIMPLE_FIELDS:
2980
          val = getattr(node, field)
2981
        elif field == "pinst_list":
2982
          val = list(node_to_primary[node.name])
2983
        elif field == "sinst_list":
2984
          val = list(node_to_secondary[node.name])
2985
        elif field == "pinst_cnt":
2986
          val = len(node_to_primary[node.name])
2987
        elif field == "sinst_cnt":
2988
          val = len(node_to_secondary[node.name])
2989
        elif field == "pip":
2990
          val = node.primary_ip
2991
        elif field == "sip":
2992
          val = node.secondary_ip
2993
        elif field == "tags":
2994
          val = list(node.GetTags())
2995
        elif field == "master":
2996
          val = node.name == master_node
2997
        elif self._FIELDS_DYNAMIC.Matches(field):
2998
          val = live_data[node.name].get(field, None)
2999
        elif field == "role":
3000
          if node.name == master_node:
3001
            val = "M"
3002
          elif node.master_candidate:
3003
            val = "C"
3004
          elif node.drained:
3005
            val = "D"
3006
          elif node.offline:
3007
            val = "O"
3008
          else:
3009
            val = "R"
3010
        else:
3011
          raise errors.ParameterError(field)
3012
        node_output.append(val)
3013
      output.append(node_output)
3014

    
3015
    return output
3016

    
3017

    
3018
class LUQueryNodeVolumes(NoHooksLU):
3019
  """Logical unit for getting volumes on node(s).
3020

3021
  """
3022
  _OP_REQP = ["nodes", "output_fields"]
3023
  REQ_BGL = False
3024
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3025
  _FIELDS_STATIC = utils.FieldSet("node")
3026

    
3027
  def ExpandNames(self):
3028
    _CheckOutputFields(static=self._FIELDS_STATIC,
3029
                       dynamic=self._FIELDS_DYNAMIC,
3030
                       selected=self.op.output_fields)
3031

    
3032
    self.needed_locks = {}
3033
    self.share_locks[locking.LEVEL_NODE] = 1
3034
    if not self.op.nodes:
3035
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3036
    else:
3037
      self.needed_locks[locking.LEVEL_NODE] = \
3038
        _GetWantedNodes(self, self.op.nodes)
3039

    
3040
  def CheckPrereq(self):
3041
    """Check prerequisites.
3042

3043
    This checks that the fields required are valid output fields.
3044

3045
    """
3046
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3047

    
3048
  def Exec(self, feedback_fn):
3049
    """Computes the list of nodes and their attributes.
3050

3051
    """
3052
    nodenames = self.nodes
3053
    volumes = self.rpc.call_node_volumes(nodenames)
3054

    
3055
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3056
             in self.cfg.GetInstanceList()]
3057

    
3058
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3059

    
3060
    output = []
3061
    for node in nodenames:
3062
      nresult = volumes[node]
3063
      if nresult.offline:
3064
        continue
3065
      msg = nresult.fail_msg
3066
      if msg:
3067
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3068
        continue
3069

    
3070
      node_vols = nresult.payload[:]
3071
      node_vols.sort(key=lambda vol: vol['dev'])
3072

    
3073
      for vol in node_vols:
3074
        node_output = []
3075
        for field in self.op.output_fields:
3076
          if field == "node":
3077
            val = node
3078
          elif field == "phys":
3079
            val = vol['dev']
3080
          elif field == "vg":
3081
            val = vol['vg']
3082
          elif field == "name":
3083
            val = vol['name']
3084
          elif field == "size":
3085
            val = int(float(vol['size']))
3086
          elif field == "instance":
3087
            for inst in ilist:
3088
              if node not in lv_by_node[inst]:
3089
                continue
3090
              if vol['name'] in lv_by_node[inst][node]:
3091
                val = inst.name
3092
                break
3093
            else:
3094
              val = '-'
3095
          else:
3096
            raise errors.ParameterError(field)
3097
          node_output.append(str(val))
3098

    
3099
        output.append(node_output)
3100

    
3101
    return output
3102

    
3103

    
3104
class LUQueryNodeStorage(NoHooksLU):
3105
  """Logical unit for getting information on storage units on node(s).
3106

3107
  """
3108
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
3109
  REQ_BGL = False
3110
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3111

    
3112
  def CheckArguments(self):
3113
    _CheckStorageType(self.op.storage_type)
3114

    
3115
    _CheckOutputFields(static=self._FIELDS_STATIC,
3116
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3117
                       selected=self.op.output_fields)
3118

    
3119
  def ExpandNames(self):
3120
    self.needed_locks = {}
3121
    self.share_locks[locking.LEVEL_NODE] = 1
3122

    
3123
    if self.op.nodes:
3124
      self.needed_locks[locking.LEVEL_NODE] = \
3125
        _GetWantedNodes(self, self.op.nodes)
3126
    else:
3127
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3128

    
3129
  def CheckPrereq(self):
3130
    """Check prerequisites.
3131

3132
    This checks that the fields required are valid output fields.
3133

3134
    """
3135
    self.op.name = getattr(self.op, "name", None)
3136

    
3137
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3138

    
3139
  def Exec(self, feedback_fn):
3140
    """Computes the list of nodes and their attributes.
3141

3142
    """
3143
    # Always get name to sort by
3144
    if constants.SF_NAME in self.op.output_fields:
3145
      fields = self.op.output_fields[:]
3146
    else:
3147
      fields = [constants.SF_NAME] + self.op.output_fields
3148

    
3149
    # Never ask for node or type as it's only known to the LU
3150
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3151
      while extra in fields:
3152
        fields.remove(extra)
3153

    
3154
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3155
    name_idx = field_idx[constants.SF_NAME]
3156

    
3157
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3158
    data = self.rpc.call_storage_list(self.nodes,
3159
                                      self.op.storage_type, st_args,
3160
                                      self.op.name, fields)
3161

    
3162
    result = []
3163

    
3164
    for node in utils.NiceSort(self.nodes):
3165
      nresult = data[node]
3166
      if nresult.offline:
3167
        continue
3168

    
3169
      msg = nresult.fail_msg
3170
      if msg:
3171
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3172
        continue
3173

    
3174
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3175

    
3176
      for name in utils.NiceSort(rows.keys()):
3177
        row = rows[name]
3178

    
3179
        out = []
3180

    
3181
        for field in self.op.output_fields:
3182
          if field == constants.SF_NODE:
3183
            val = node
3184
          elif field == constants.SF_TYPE:
3185
            val = self.op.storage_type
3186
          elif field in field_idx:
3187
            val = row[field_idx[field]]
3188
          else:
3189
            raise errors.ParameterError(field)
3190

    
3191
          out.append(val)
3192

    
3193
        result.append(out)
3194

    
3195
    return result
3196

    
3197

    
3198
class LUModifyNodeStorage(NoHooksLU):
3199
  """Logical unit for modifying a storage volume on a node.
3200

3201
  """
3202
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3203
  REQ_BGL = False
3204

    
3205
  def CheckArguments(self):
3206
    self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
3207

    
3208
    _CheckStorageType(self.op.storage_type)
3209

    
3210
  def ExpandNames(self):
3211
    self.needed_locks = {
3212
      locking.LEVEL_NODE: self.op.node_name,
3213
      }
3214

    
3215
  def CheckPrereq(self):
3216
    """Check prerequisites.
3217

3218
    """
3219
    storage_type = self.op.storage_type
3220

    
3221
    try:
3222
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3223
    except KeyError:
3224
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3225
                                 " modified" % storage_type,
3226
                                 errors.ECODE_INVAL)
3227

    
3228
    diff = set(self.op.changes.keys()) - modifiable
3229
    if diff:
3230
      raise errors.OpPrereqError("The following fields can not be modified for"
3231
                                 " storage units of type '%s': %r" %
3232
                                 (storage_type, list(diff)),
3233
                                 errors.ECODE_INVAL)
3234

    
3235
  def Exec(self, feedback_fn):
3236
    """Computes the list of nodes and their attributes.
3237

3238
    """
3239
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3240
    result = self.rpc.call_storage_modify(self.op.node_name,
3241
                                          self.op.storage_type, st_args,
3242
                                          self.op.name, self.op.changes)
3243
    result.Raise("Failed to modify storage unit '%s' on %s" %
3244
                 (self.op.name, self.op.node_name))
3245

    
3246

    
3247
class LUAddNode(LogicalUnit):
3248
  """Logical unit for adding node to the cluster.
3249

3250
  """
3251
  HPATH = "node-add"
3252
  HTYPE = constants.HTYPE_NODE
3253
  _OP_REQP = ["node_name"]
3254

    
3255
  def CheckArguments(self):
3256
    # validate/normalize the node name
3257
    self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3258

    
3259
  def BuildHooksEnv(self):
3260
    """Build hooks env.
3261

3262
    This will run on all nodes before, and on all nodes + the new node after.
3263

3264
    """
3265
    env = {
3266
      "OP_TARGET": self.op.node_name,
3267
      "NODE_NAME": self.op.node_name,
3268
      "NODE_PIP": self.op.primary_ip,
3269
      "NODE_SIP": self.op.secondary_ip,
3270
      }
3271
    nodes_0 = self.cfg.GetNodeList()
3272
    nodes_1 = nodes_0 + [self.op.node_name, ]
3273
    return env, nodes_0, nodes_1
3274

    
3275
  def CheckPrereq(self):
3276
    """Check prerequisites.
3277

3278
    This checks:
3279
     - the new node is not already in the config
3280
     - it is resolvable
3281
     - its parameters (single/dual homed) matches the cluster
3282

3283
    Any errors are signaled by raising errors.OpPrereqError.
3284

3285
    """
3286
    node_name = self.op.node_name
3287
    cfg = self.cfg
3288

    
3289
    dns_data = utils.GetHostInfo(node_name)
3290

    
3291
    node = dns_data.name
3292
    primary_ip = self.op.primary_ip = dns_data.ip
3293
    secondary_ip = getattr(self.op, "secondary_ip", None)
3294
    if secondary_ip is None:
3295
      secondary_ip = primary_ip
3296
    if not utils.IsValidIP(secondary_ip):
3297
      raise errors.OpPrereqError("Invalid secondary IP given",
3298
                                 errors.ECODE_INVAL)
3299
    self.op.secondary_ip = secondary_ip
3300

    
3301
    node_list = cfg.GetNodeList()
3302
    if not self.op.readd and node in node_list:
3303
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3304
                                 node, errors.ECODE_EXISTS)
3305
    elif self.op.readd and node not in node_list:
3306
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3307
                                 errors.ECODE_NOENT)
3308

    
3309
    for existing_node_name in node_list:
3310
      existing_node = cfg.GetNodeInfo(existing_node_name)
3311

    
3312
      if self.op.readd and node == existing_node_name:
3313
        if (existing_node.primary_ip != primary_ip or
3314
            existing_node.secondary_ip != secondary_ip):
3315
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3316
                                     " address configuration as before",
3317
                                     errors.ECODE_INVAL)
3318
        continue
3319

    
3320
      if (existing_node.primary_ip == primary_ip or
3321
          existing_node.secondary_ip == primary_ip or
3322
          existing_node.primary_ip == secondary_ip or
3323
          existing_node.secondary_ip == secondary_ip):
3324
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3325
                                   " existing node %s" % existing_node.name,
3326
                                   errors.ECODE_NOTUNIQUE)
3327

    
3328
    # check that the type of the node (single versus dual homed) is the
3329
    # same as for the master
3330
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3331
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3332
    newbie_singlehomed = secondary_ip == primary_ip
3333
    if master_singlehomed != newbie_singlehomed:
3334
      if master_singlehomed:
3335
        raise errors.OpPrereqError("The master has no private ip but the"
3336
                                   " new node has one",
3337
                                   errors.ECODE_INVAL)
3338
      else:
3339
        raise errors.OpPrereqError("The master has a private ip but the"
3340
                                   " new node doesn't have one",
3341
                                   errors.ECODE_INVAL)
3342

    
3343
    # checks reachability
3344
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3345
      raise errors.OpPrereqError("Node not reachable by ping",
3346
                                 errors.ECODE_ENVIRON)
3347

    
3348
    if not newbie_singlehomed:
3349
      # check reachability from my secondary ip to newbie's secondary ip
3350
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3351
                           source=myself.secondary_ip):
3352
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3353
                                   " based ping to noded port",
3354
                                   errors.ECODE_ENVIRON)
3355

    
3356
    if self.op.readd:
3357
      exceptions = [node]
3358
    else:
3359
      exceptions = []
3360

    
3361
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3362

    
3363
    if self.op.readd:
3364
      self.new_node = self.cfg.GetNodeInfo(node)
3365
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3366
    else:
3367
      self.new_node = objects.Node(name=node,
3368
                                   primary_ip=primary_ip,
3369
                                   secondary_ip=secondary_ip,
3370
                                   master_candidate=self.master_candidate,
3371
                                   offline=False, drained=False)
3372

    
3373
  def Exec(self, feedback_fn):
3374
    """Adds the new node to the cluster.
3375

3376
    """
3377
    new_node = self.new_node
3378
    node = new_node.name
3379

    
3380
    # for re-adds, reset the offline/drained/master-candidate flags;
3381
    # we need to reset here, otherwise offline would prevent RPC calls
3382
    # later in the procedure; this also means that if the re-add
3383
    # fails, we are left with a non-offlined, broken node
3384
    if self.op.readd:
3385
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3386
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3387
      # if we demote the node, we do cleanup later in the procedure
3388
      new_node.master_candidate = self.master_candidate
3389

    
3390
    # notify the user about any possible mc promotion
3391
    if new_node.master_candidate:
3392
      self.LogInfo("Node will be a master candidate")
3393

    
3394
    # check connectivity
3395
    result = self.rpc.call_version([node])[node]
3396
    result.Raise("Can't get version information from node %s" % node)
3397
    if constants.PROTOCOL_VERSION == result.payload:
3398
      logging.info("Communication to node %s fine, sw version %s match",
3399
                   node, result.payload)
3400
    else:
3401
      raise errors.OpExecError("Version mismatch master version %s,"
3402
                               " node version %s" %
3403
                               (constants.PROTOCOL_VERSION, result.payload))
3404

    
3405
    # setup ssh on node
3406
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3407
      logging.info("Copy ssh key to node %s", node)
3408
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3409
      keyarray = []
3410
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3411
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3412
                  priv_key, pub_key]
3413

    
3414
      for i in keyfiles:
3415
        keyarray.append(utils.ReadFile(i))
3416

    
3417
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3418
                                      keyarray[2], keyarray[3], keyarray[4],
3419
                                      keyarray[5])
3420
      result.Raise("Cannot transfer ssh keys to the new node")
3421

    
3422
    # Add node to our /etc/hosts, and add key to known_hosts
3423
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3424
      utils.AddHostToEtcHosts(new_node.name)
3425

    
3426
    if new_node.secondary_ip != new_node.primary_ip:
3427
      result = self.rpc.call_node_has_ip_address(new_node.name,
3428
                                                 new_node.secondary_ip)
3429
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3430
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3431
      if not result.payload:
3432
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3433
                                 " you gave (%s). Please fix and re-run this"
3434
                                 " command." % new_node.secondary_ip)
3435

    
3436
    node_verify_list = [self.cfg.GetMasterNode()]
3437
    node_verify_param = {
3438
      constants.NV_NODELIST: [node],
3439
      # TODO: do a node-net-test as well?
3440
    }
3441

    
3442
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3443
                                       self.cfg.GetClusterName())
3444
    for verifier in node_verify_list:
3445
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3446
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3447
      if nl_payload:
3448
        for failed in nl_payload:
3449
          feedback_fn("ssh/hostname verification failed"
3450
                      " (checking from %s): %s" %
3451
                      (verifier, nl_payload[failed]))
3452
        raise errors.OpExecError("ssh/hostname verification failed.")
3453

    
3454
    if self.op.readd:
3455
      _RedistributeAncillaryFiles(self)
3456
      self.context.ReaddNode(new_node)
3457
      # make sure we redistribute the config
3458
      self.cfg.Update(new_node, feedback_fn)
3459
      # and make sure the new node will not have old files around
3460
      if not new_node.master_candidate:
3461
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3462
        msg = result.fail_msg
3463
        if msg:
3464
          self.LogWarning("Node failed to demote itself from master"
3465
                          " candidate status: %s" % msg)
3466
    else:
3467
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3468
      self.context.AddNode(new_node, self.proc.GetECId())
3469

    
3470

    
3471
class LUSetNodeParams(LogicalUnit):
3472
  """Modifies the parameters of a node.
3473

3474
  """
3475
  HPATH = "node-modify"
3476
  HTYPE = constants.HTYPE_NODE
3477
  _OP_REQP = ["node_name"]
3478
  REQ_BGL = False
3479

    
3480
  def CheckArguments(self):
3481
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3482
    _CheckBooleanOpField(self.op, 'master_candidate')
3483
    _CheckBooleanOpField(self.op, 'offline')
3484
    _CheckBooleanOpField(self.op, 'drained')
3485
    _CheckBooleanOpField(self.op, 'auto_promote')
3486
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3487
    if all_mods.count(None) == 3:
3488
      raise errors.OpPrereqError("Please pass at least one modification",
3489
                                 errors.ECODE_INVAL)
3490
    if all_mods.count(True) > 1:
3491
      raise errors.OpPrereqError("Can't set the node into more than one"
3492
                                 " state at the same time",
3493
                                 errors.ECODE_INVAL)
3494

    
3495
    # Boolean value that tells us whether we're offlining or draining the node
3496
    self.offline_or_drain = (self.op.offline == True or
3497
                             self.op.drained == True)
3498
    self.deoffline_or_drain = (self.op.offline == False or
3499
                               self.op.drained == False)
3500
    self.might_demote = (self.op.master_candidate == False or
3501
                         self.offline_or_drain)
3502

    
3503
    self.lock_all = self.op.auto_promote and self.might_demote
3504

    
3505

    
3506
  def ExpandNames(self):
3507
    if self.lock_all:
3508
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3509
    else:
3510
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3511

    
3512
  def BuildHooksEnv(self):
3513
    """Build hooks env.
3514

3515
    This runs on the master node.
3516

3517
    """
3518
    env = {
3519
      "OP_TARGET": self.op.node_name,
3520
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3521
      "OFFLINE": str(self.op.offline),
3522
      "DRAINED": str(self.op.drained),
3523
      }
3524
    nl = [self.cfg.GetMasterNode(),
3525
          self.op.node_name]
3526
    return env, nl, nl
3527

    
3528
  def CheckPrereq(self):
3529
    """Check prerequisites.
3530

3531
    This only checks the instance list against the existing names.
3532

3533
    """
3534
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3535

    
3536
    if (self.op.master_candidate is not None or
3537
        self.op.drained is not None or
3538
        self.op.offline is not None):
3539
      # we can't change the master's node flags
3540
      if self.op.node_name == self.cfg.GetMasterNode():
3541
        raise errors.OpPrereqError("The master role can be changed"
3542
                                   " only via masterfailover",
3543
                                   errors.ECODE_INVAL)
3544

    
3545

    
3546
    if node.master_candidate and self.might_demote and not self.lock_all:
3547
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
3548
      # check if after removing the current node, we're missing master
3549
      # candidates
3550
      (mc_remaining, mc_should, _) = \
3551
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3552
      if mc_remaining < mc_should:
3553
        raise errors.OpPrereqError("Not enough master candidates, please"
3554
                                   " pass auto_promote to allow promotion",
3555
                                   errors.ECODE_INVAL)
3556

    
3557
    if (self.op.master_candidate == True and
3558
        ((node.offline and not self.op.offline == False) or
3559
         (node.drained and not self.op.drained == False))):
3560
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3561
                                 " to master_candidate" % node.name,
3562
                                 errors.ECODE_INVAL)
3563

    
3564
    # If we're being deofflined/drained, we'll MC ourself if needed
3565
    if (self.deoffline_or_drain and not self.offline_or_drain and not
3566
        self.op.master_candidate == True and not node.master_candidate):
3567
      self.op.master_candidate = _DecideSelfPromotion(self)
3568
      if self.op.master_candidate:
3569
        self.LogInfo("Autopromoting node to master candidate")
3570

    
3571
    return
3572

    
3573
  def Exec(self, feedback_fn):
3574
    """Modifies a node.
3575

3576
    """
3577
    node = self.node
3578

    
3579
    result = []
3580
    changed_mc = False
3581

    
3582
    if self.op.offline is not None:
3583
      node.offline = self.op.offline
3584
      result.append(("offline", str(self.op.offline)))
3585
      if self.op.offline == True:
3586
        if node.master_candidate:
3587
          node.master_candidate = False
3588
          changed_mc = True
3589
          result.append(("master_candidate", "auto-demotion due to offline"))
3590
        if node.drained:
3591
          node.drained = False
3592
          result.append(("drained", "clear drained status due to offline"))
3593

    
3594
    if self.op.master_candidate is not None:
3595
      node.master_candidate = self.op.master_candidate
3596
      changed_mc = True
3597
      result.append(("master_candidate", str(self.op.master_candidate)))
3598
      if self.op.master_candidate == False:
3599
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3600
        msg = rrc.fail_msg
3601
        if msg:
3602
          self.LogWarning("Node failed to demote itself: %s" % msg)
3603

    
3604
    if self.op.drained is not None:
3605
      node.drained = self.op.drained
3606
      result.append(("drained", str(self.op.drained)))
3607
      if self.op.drained == True:
3608
        if node.master_candidate:
3609
          node.master_candidate = False
3610
          changed_mc = True
3611
          result.append(("master_candidate", "auto-demotion due to drain"))
3612
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3613
          msg = rrc.fail_msg
3614
          if msg:
3615
            self.LogWarning("Node failed to demote itself: %s" % msg)
3616
        if node.offline:
3617
          node.offline = False
3618
          result.append(("offline", "clear offline status due to drain"))
3619

    
3620
    # we locked all nodes, we adjust the CP before updating this node
3621
    if self.lock_all:
3622
      _AdjustCandidatePool(self, [node.name])
3623

    
3624
    # this will trigger configuration file update, if needed
3625
    self.cfg.Update(node, feedback_fn)
3626

    
3627
    # this will trigger job queue propagation or cleanup
3628
    if changed_mc:
3629
      self.context.ReaddNode(node)
3630

    
3631
    return result
3632

    
3633

    
3634
class LUPowercycleNode(NoHooksLU):
3635
  """Powercycles a node.
3636

3637
  """
3638
  _OP_REQP = ["node_name", "force"]
3639
  REQ_BGL = False
3640

    
3641
  def CheckArguments(self):
3642
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3643
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3644
      raise errors.OpPrereqError("The node is the master and the force"
3645
                                 " parameter was not set",
3646
                                 errors.ECODE_INVAL)
3647

    
3648
  def ExpandNames(self):
3649
    """Locking for PowercycleNode.
3650

3651
    This is a last-resort option and shouldn't block on other
3652
    jobs. Therefore, we grab no locks.
3653

3654
    """
3655
    self.needed_locks = {}
3656

    
3657
  def CheckPrereq(self):
3658
    """Check prerequisites.
3659

3660
    This LU has no prereqs.
3661

3662
    """
3663
    pass
3664

    
3665
  def Exec(self, feedback_fn):
3666
    """Reboots a node.
3667

3668
    """
3669
    result = self.rpc.call_node_powercycle(self.op.node_name,
3670
                                           self.cfg.GetHypervisorType())
3671
    result.Raise("Failed to schedule the reboot")
3672
    return result.payload
3673

    
3674

    
3675
class LUQueryClusterInfo(NoHooksLU):
3676
  """Query cluster configuration.
3677

3678
  """
3679
  _OP_REQP = []
3680
  REQ_BGL = False
3681

    
3682
  def ExpandNames(self):
3683
    self.needed_locks = {}
3684

    
3685
  def CheckPrereq(self):
3686
    """No prerequsites needed for this LU.
3687

3688
    """
3689
    pass
3690

    
3691
  def Exec(self, feedback_fn):
3692
    """Return cluster config.
3693

3694
    """
3695
    cluster = self.cfg.GetClusterInfo()
3696
    os_hvp = {}
3697

    
3698
    # Filter just for enabled hypervisors
3699
    for os_name, hv_dict in cluster.os_hvp.items():
3700
      os_hvp[os_name] = {}
3701
      for hv_name, hv_params in hv_dict.items():
3702
        if hv_name in cluster.enabled_hypervisors:
3703
          os_hvp[os_name][hv_name] = hv_params
3704

    
3705
    result = {
3706
      "software_version": constants.RELEASE_VERSION,
3707
      "protocol_version": constants.PROTOCOL_VERSION,
3708
      "config_version": constants.CONFIG_VERSION,
3709
      "os_api_version": max(constants.OS_API_VERSIONS),
3710
      "export_version": constants.EXPORT_VERSION,
3711
      "architecture": (platform.architecture()[0], platform.machine()),
3712
      "name": cluster.cluster_name,
3713
      "master": cluster.master_node,
3714
      "default_hypervisor": cluster.enabled_hypervisors[0],
3715
      "enabled_hypervisors": cluster.enabled_hypervisors,
3716
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3717
                        for hypervisor_name in cluster.enabled_hypervisors]),
3718
      "os_hvp": os_hvp,
3719
      "beparams": cluster.beparams,
3720
      "nicparams": cluster.nicparams,
3721
      "candidate_pool_size": cluster.candidate_pool_size,
3722
      "master_netdev": cluster.master_netdev,
3723
      "volume_group_name": cluster.volume_group_name,
3724
      "file_storage_dir": cluster.file_storage_dir,
3725
      "maintain_node_health": cluster.maintain_node_health,
3726
      "ctime": cluster.ctime,
3727
      "mtime": cluster.mtime,
3728
      "uuid": cluster.uuid,
3729
      "tags": list(cluster.GetTags()),
3730
      "uid_pool": cluster.uid_pool,
3731
      }
3732

    
3733
    return result
3734

    
3735

    
3736
class LUQueryConfigValues(NoHooksLU):
3737
  """Return configuration values.
3738

3739
  """
3740
  _OP_REQP = []
3741
  REQ_BGL = False
3742
  _FIELDS_DYNAMIC = utils.FieldSet()
3743
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3744
                                  "watcher_pause")
3745

    
3746
  def ExpandNames(self):
3747
    self.needed_locks = {}
3748

    
3749
    _CheckOutputFields(static=self._FIELDS_STATIC,
3750
                       dynamic=self._FIELDS_DYNAMIC,
3751
                       selected=self.op.output_fields)
3752

    
3753
  def CheckPrereq(self):
3754
    """No prerequisites.
3755

3756
    """
3757
    pass
3758

    
3759
  def Exec(self, feedback_fn):
3760
    """Dump a representation of the cluster config to the standard output.
3761

3762
    """
3763
    values = []
3764
    for field in self.op.output_fields:
3765
      if field == "cluster_name":
3766
        entry = self.cfg.GetClusterName()
3767
      elif field == "master_node":
3768
        entry = self.cfg.GetMasterNode()
3769
      elif field == "drain_flag":
3770
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3771
      elif field == "watcher_pause":
3772
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3773
      else:
3774
        raise errors.ParameterError(field)
3775
      values.append(entry)
3776
    return values
3777

    
3778

    
3779
class LUActivateInstanceDisks(NoHooksLU):
3780
  """Bring up an instance's disks.
3781

3782
  """
3783
  _OP_REQP = ["instance_name"]
3784
  REQ_BGL = False
3785

    
3786
  def ExpandNames(self):
3787
    self._ExpandAndLockInstance()
3788
    self.needed_locks[locking.LEVEL_NODE] = []
3789
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3790

    
3791
  def DeclareLocks(self, level):
3792
    if level == locking.LEVEL_NODE:
3793
      self._LockInstancesNodes()
3794

    
3795
  def CheckPrereq(self):
3796
    """Check prerequisites.
3797

3798
    This checks that the instance is in the cluster.
3799

3800
    """
3801
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3802
    assert self.instance is not None, \
3803
      "Cannot retrieve locked instance %s" % self.op.instance_name
3804
    _CheckNodeOnline(self, self.instance.primary_node)
3805
    if not hasattr(self.op, "ignore_size"):
3806
      self.op.ignore_size = False
3807

    
3808
  def Exec(self, feedback_fn):
3809
    """Activate the disks.
3810

3811
    """
3812
    disks_ok, disks_info = \
3813
              _AssembleInstanceDisks(self, self.instance,
3814
                                     ignore_size=self.op.ignore_size)
3815
    if not disks_ok:
3816
      raise errors.OpExecError("Cannot activate block devices")
3817

    
3818
    return disks_info
3819

    
3820

    
3821
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3822
                           ignore_size=False):
3823
  """Prepare the block devices for an instance.
3824

3825
  This sets up the block devices on all nodes.
3826

3827
  @type lu: L{LogicalUnit}
3828
  @param lu: the logical unit on whose behalf we execute
3829
  @type instance: L{objects.Instance}
3830
  @param instance: the instance for whose disks we assemble
3831
  @type ignore_secondaries: boolean
3832
  @param ignore_secondaries: if true, errors on secondary nodes
3833
      won't result in an error return from the function
3834
  @type ignore_size: boolean
3835
  @param ignore_size: if true, the current known size of the disk
3836
      will not be used during the disk activation, useful for cases
3837
      when the size is wrong
3838
  @return: False if the operation failed, otherwise a list of
3839
      (host, instance_visible_name, node_visible_name)
3840
      with the mapping from node devices to instance devices
3841

3842
  """
3843
  device_info = []
3844
  disks_ok = True
3845
  iname = instance.name
3846
  # With the two passes mechanism we try to reduce the window of
3847
  # opportunity for the race condition of switching DRBD to primary
3848
  # before handshaking occured, but we do not eliminate it
3849

    
3850
  # The proper fix would be to wait (with some limits) until the
3851
  # connection has been made and drbd transitions from WFConnection
3852
  # into any other network-connected state (Connected, SyncTarget,
3853
  # SyncSource, etc.)
3854

    
3855
  # 1st pass, assemble on all nodes in secondary mode
3856
  for inst_disk in instance.disks:
3857
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3858
      if ignore_size:
3859
        node_disk = node_disk.Copy()
3860
        node_disk.UnsetSize()
3861
      lu.cfg.SetDiskID(node_disk, node)
3862
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3863
      msg = result.fail_msg
3864
      if msg:
3865
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3866
                           " (is_primary=False, pass=1): %s",
3867
                           inst_disk.iv_name, node, msg)
3868
        if not ignore_secondaries:
3869
          disks_ok = False
3870

    
3871
  # FIXME: race condition on drbd migration to primary
3872

    
3873
  # 2nd pass, do only the primary node
3874
  for inst_disk in instance.disks:
3875
    dev_path = None
3876

    
3877
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3878
      if node != instance.primary_node:
3879
        continue
3880
      if ignore_size:
3881
        node_disk = node_disk.Copy()
3882
        node_disk.UnsetSize()
3883
      lu.cfg.SetDiskID(node_disk, node)
3884
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3885
      msg = result.fail_msg
3886
      if msg:
3887
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3888
                           " (is_primary=True, pass=2): %s",
3889
                           inst_disk.iv_name, node, msg)
3890
        disks_ok = False
3891
      else:
3892
        dev_path = result.payload
3893

    
3894
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3895

    
3896
  # leave the disks configured for the primary node
3897
  # this is a workaround that would be fixed better by
3898
  # improving the logical/physical id handling
3899
  for disk in instance.disks:
3900
    lu.cfg.SetDiskID(disk, instance.primary_node)
3901

    
3902
  return disks_ok, device_info
3903

    
3904

    
3905
def _StartInstanceDisks(lu, instance, force):
3906
  """Start the disks of an instance.
3907

3908
  """
3909
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3910
                                           ignore_secondaries=force)
3911
  if not disks_ok:
3912
    _ShutdownInstanceDisks(lu, instance)
3913
    if force is not None and not force:
3914
      lu.proc.LogWarning("", hint="If the message above refers to a"
3915
                         " secondary node,"
3916
                         " you can retry the operation using '--force'.")
3917
    raise errors.OpExecError("Disk consistency error")
3918

    
3919

    
3920
class LUDeactivateInstanceDisks(NoHooksLU):
3921
  """Shutdown an instance's disks.
3922

3923
  """
3924
  _OP_REQP = ["instance_name"]
3925
  REQ_BGL = False
3926

    
3927
  def ExpandNames(self):
3928
    self._ExpandAndLockInstance()
3929
    self.needed_locks[locking.LEVEL_NODE] = []
3930
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3931

    
3932
  def DeclareLocks(self, level):
3933
    if level == locking.LEVEL_NODE:
3934
      self._LockInstancesNodes()
3935

    
3936
  def CheckPrereq(self):
3937
    """Check prerequisites.
3938

3939
    This checks that the instance is in the cluster.
3940

3941
    """
3942
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3943
    assert self.instance is not None, \
3944
      "Cannot retrieve locked instance %s" % self.op.instance_name
3945

    
3946
  def Exec(self, feedback_fn):
3947
    """Deactivate the disks
3948

3949
    """
3950
    instance = self.instance
3951
    _SafeShutdownInstanceDisks(self, instance)
3952

    
3953

    
3954
def _SafeShutdownInstanceDisks(lu, instance):
3955
  """Shutdown block devices of an instance.
3956

3957
  This function checks if an instance is running, before calling
3958
  _ShutdownInstanceDisks.
3959

3960
  """
3961
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
3962
  _ShutdownInstanceDisks(lu, instance)
3963

    
3964

    
3965
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3966
  """Shutdown block devices of an instance.
3967

3968
  This does the shutdown on all nodes of the instance.
3969

3970
  If the ignore_primary is false, errors on the primary node are
3971
  ignored.
3972

3973
  """
3974
  all_result = True
3975
  for disk in instance.disks:
3976
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3977
      lu.cfg.SetDiskID(top_disk, node)
3978
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3979
      msg = result.fail_msg
3980
      if msg:
3981
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3982
                      disk.iv_name, node, msg)
3983
        if not ignore_primary or node != instance.primary_node:
3984
          all_result = False
3985
  return all_result
3986

    
3987

    
3988
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3989
  """Checks if a node has enough free memory.
3990

3991
  This function check if a given node has the needed amount of free
3992
  memory. In case the node has less memory or we cannot get the
3993
  information from the node, this function raise an OpPrereqError
3994
  exception.
3995

3996
  @type lu: C{LogicalUnit}
3997
  @param lu: a logical unit from which we get configuration data
3998
  @type node: C{str}
3999
  @param node: the node to check
4000
  @type reason: C{str}
4001
  @param reason: string to use in the error message
4002
  @type requested: C{int}
4003
  @param requested: the amount of memory in MiB to check for
4004
  @type hypervisor_name: C{str}
4005
  @param hypervisor_name: the hypervisor to ask for memory stats
4006
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4007
      we cannot check the node
4008

4009
  """
4010
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4011
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4012
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4013
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4014
  if not isinstance(free_mem, int):
4015
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4016
                               " was '%s'" % (node, free_mem),
4017
                               errors.ECODE_ENVIRON)
4018
  if requested > free_mem:
4019
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4020
                               " needed %s MiB, available %s MiB" %
4021
                               (node, reason, requested, free_mem),
4022
                               errors.ECODE_NORES)
4023

    
4024

    
4025
def _CheckNodesFreeDisk(lu, nodenames, requested):
4026
  """Checks if nodes have enough free disk space in the default VG.
4027

4028
  This function check if all given nodes have the needed amount of
4029
  free disk. In case any node has less disk or we cannot get the
4030
  information from the node, this function raise an OpPrereqError
4031
  exception.
4032

4033
  @type lu: C{LogicalUnit}
4034
  @param lu: a logical unit from which we get configuration data
4035
  @type nodenames: C{list}
4036
  @param nodenames: the list of node names to check
4037
  @type requested: C{int}
4038
  @param requested: the amount of disk in MiB to check for
4039
  @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4040
      we cannot check the node
4041

4042
  """
4043
  nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4044
                                   lu.cfg.GetHypervisorType())
4045
  for node in nodenames:
4046
    info = nodeinfo[node]
4047
    info.Raise("Cannot get current information from node %s" % node,
4048
               prereq=True, ecode=errors.ECODE_ENVIRON)
4049
    vg_free = info.payload.get("vg_free", None)
4050
    if not isinstance(vg_free, int):
4051
      raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4052
                                 " result was '%s'" % (node, vg_free),
4053
                                 errors.ECODE_ENVIRON)
4054
    if requested > vg_free:
4055
      raise errors.OpPrereqError("Not enough disk space on target node %s:"
4056
                                 " required %d MiB, available %d MiB" %
4057
                                 (node, requested, vg_free),
4058
                                 errors.ECODE_NORES)
4059

    
4060

    
4061
class LUStartupInstance(LogicalUnit):
4062
  """Starts an instance.
4063

4064
  """
4065
  HPATH = "instance-start"
4066
  HTYPE = constants.HTYPE_INSTANCE
4067
  _OP_REQP = ["instance_name", "force"]
4068
  REQ_BGL = False
4069

    
4070
  def ExpandNames(self):
4071
    self._ExpandAndLockInstance()
4072

    
4073
  def BuildHooksEnv(self):
4074
    """Build hooks env.
4075

4076
    This runs on master, primary and secondary nodes of the instance.
4077

4078
    """
4079
    env = {
4080
      "FORCE": self.op.force,
4081
      }
4082
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4083
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4084
    return env, nl, nl
4085

    
4086
  def CheckPrereq(self):
4087
    """Check prerequisites.
4088

4089
    This checks that the instance is in the cluster.
4090

4091
    """
4092
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4093
    assert self.instance is not None, \
4094
      "Cannot retrieve locked instance %s" % self.op.instance_name
4095

    
4096
    # extra beparams
4097
    self.beparams = getattr(self.op, "beparams", {})
4098
    if self.beparams:
4099
      if not isinstance(self.beparams, dict):
4100
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4101
                                   " dict" % (type(self.beparams), ),
4102
                                   errors.ECODE_INVAL)
4103
      # fill the beparams dict
4104
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4105
      self.op.beparams = self.beparams
4106

    
4107
    # extra hvparams
4108
    self.hvparams = getattr(self.op, "hvparams", {})
4109
    if self.hvparams:
4110
      if not isinstance(self.hvparams, dict):
4111
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4112
                                   " dict" % (type(self.hvparams), ),
4113
                                   errors.ECODE_INVAL)
4114

    
4115
      # check hypervisor parameter syntax (locally)
4116
      cluster = self.cfg.GetClusterInfo()
4117
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4118
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
4119
                                    instance.hvparams)
4120
      filled_hvp.update(self.hvparams)
4121
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4122
      hv_type.CheckParameterSyntax(filled_hvp)
4123
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4124
      self.op.hvparams = self.hvparams
4125

    
4126
    _CheckNodeOnline(self, instance.primary_node)
4127

    
4128
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4129
    # check bridges existence
4130
    _CheckInstanceBridgesExist(self, instance)
4131

    
4132
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4133
                                              instance.name,
4134
                                              instance.hypervisor)
4135
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4136
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4137
    if not remote_info.payload: # not running already
4138
      _CheckNodeFreeMemory(self, instance.primary_node,
4139
                           "starting instance %s" % instance.name,
4140
                           bep[constants.BE_MEMORY], instance.hypervisor)
4141

    
4142
  def Exec(self, feedback_fn):
4143
    """Start the instance.
4144

4145
    """
4146
    instance = self.instance
4147
    force = self.op.force
4148

    
4149
    self.cfg.MarkInstanceUp(instance.name)
4150

    
4151
    node_current = instance.primary_node
4152

    
4153
    _StartInstanceDisks(self, instance, force)
4154

    
4155
    result = self.rpc.call_instance_start(node_current, instance,
4156
                                          self.hvparams, self.beparams)
4157
    msg = result.fail_msg
4158
    if msg:
4159
      _ShutdownInstanceDisks(self, instance)
4160
      raise errors.OpExecError("Could not start instance: %s" % msg)
4161

    
4162

    
4163
class LURebootInstance(LogicalUnit):
4164
  """Reboot an instance.
4165

4166
  """
4167
  HPATH = "instance-reboot"
4168
  HTYPE = constants.HTYPE_INSTANCE
4169
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4170
  REQ_BGL = False
4171

    
4172
  def CheckArguments(self):
4173
    """Check the arguments.
4174

4175
    """
4176
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4177
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4178

    
4179
  def ExpandNames(self):
4180
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4181
                                   constants.INSTANCE_REBOOT_HARD,
4182
                                   constants.INSTANCE_REBOOT_FULL]:
4183
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4184
                                  (constants.INSTANCE_REBOOT_SOFT,
4185
                                   constants.INSTANCE_REBOOT_HARD,
4186
                                   constants.INSTANCE_REBOOT_FULL))
4187
    self._ExpandAndLockInstance()
4188

    
4189
  def BuildHooksEnv(self):
4190
    """Build hooks env.
4191

4192
    This runs on master, primary and secondary nodes of the instance.
4193

4194
    """
4195
    env = {
4196
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4197
      "REBOOT_TYPE": self.op.reboot_type,
4198
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4199
      }
4200
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4201
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4202
    return env, nl, nl
4203

    
4204
  def CheckPrereq(self):
4205
    """Check prerequisites.
4206

4207
    This checks that the instance is in the cluster.
4208

4209
    """
4210
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4211
    assert self.instance is not None, \
4212
      "Cannot retrieve locked instance %s" % self.op.instance_name
4213

    
4214
    _CheckNodeOnline(self, instance.primary_node)
4215

    
4216
    # check bridges existence
4217
    _CheckInstanceBridgesExist(self, instance)
4218

    
4219
  def Exec(self, feedback_fn):
4220
    """Reboot the instance.
4221

4222
    """
4223
    instance = self.instance
4224
    ignore_secondaries = self.op.ignore_secondaries
4225
    reboot_type = self.op.reboot_type
4226

    
4227
    node_current = instance.primary_node
4228

    
4229
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4230
                       constants.INSTANCE_REBOOT_HARD]:
4231
      for disk in instance.disks:
4232
        self.cfg.SetDiskID(disk, node_current)
4233
      result = self.rpc.call_instance_reboot(node_current, instance,
4234
                                             reboot_type,
4235
                                             self.shutdown_timeout)
4236
      result.Raise("Could not reboot instance")
4237
    else:
4238
      result = self.rpc.call_instance_shutdown(node_current, instance,
4239
                                               self.shutdown_timeout)
4240
      result.Raise("Could not shutdown instance for full reboot")
4241
      _ShutdownInstanceDisks(self, instance)
4242
      _StartInstanceDisks(self, instance, ignore_secondaries)
4243
      result = self.rpc.call_instance_start(node_current, instance, None, None)
4244
      msg = result.fail_msg
4245
      if msg:
4246
        _ShutdownInstanceDisks(self, instance)
4247
        raise errors.OpExecError("Could not start instance for"
4248
                                 " full reboot: %s" % msg)
4249

    
4250
    self.cfg.MarkInstanceUp(instance.name)
4251

    
4252

    
4253
class LUShutdownInstance(LogicalUnit):
4254
  """Shutdown an instance.
4255

4256
  """
4257
  HPATH = "instance-stop"
4258
  HTYPE = constants.HTYPE_INSTANCE
4259
  _OP_REQP = ["instance_name"]
4260
  REQ_BGL = False
4261

    
4262
  def CheckArguments(self):
4263
    """Check the arguments.
4264

4265
    """
4266
    self.timeout = getattr(self.op, "timeout",
4267
                           constants.DEFAULT_SHUTDOWN_TIMEOUT)
4268

    
4269
  def ExpandNames(self):
4270
    self._ExpandAndLockInstance()
4271

    
4272
  def BuildHooksEnv(self):
4273
    """Build hooks env.
4274

4275
    This runs on master, primary and secondary nodes of the instance.
4276

4277
    """
4278
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4279
    env["TIMEOUT"] = self.timeout
4280
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4281
    return env, nl, nl
4282

    
4283
  def CheckPrereq(self):
4284
    """Check prerequisites.
4285

4286
    This checks that the instance is in the cluster.
4287

4288
    """
4289
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4290
    assert self.instance is not None, \
4291
      "Cannot retrieve locked instance %s" % self.op.instance_name
4292
    _CheckNodeOnline(self, self.instance.primary_node)
4293

    
4294
  def Exec(self, feedback_fn):
4295
    """Shutdown the instance.
4296

4297
    """
4298
    instance = self.instance
4299
    node_current = instance.primary_node
4300
    timeout = self.timeout
4301
    self.cfg.MarkInstanceDown(instance.name)
4302
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4303
    msg = result.fail_msg
4304
    if msg:
4305
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4306

    
4307
    _ShutdownInstanceDisks(self, instance)
4308

    
4309

    
4310
class LUReinstallInstance(LogicalUnit):
4311
  """Reinstall an instance.
4312

4313
  """
4314
  HPATH = "instance-reinstall"
4315
  HTYPE = constants.HTYPE_INSTANCE
4316
  _OP_REQP = ["instance_name"]
4317
  REQ_BGL = False
4318

    
4319
  def ExpandNames(self):
4320
    self._ExpandAndLockInstance()
4321

    
4322
  def BuildHooksEnv(self):
4323
    """Build hooks env.
4324

4325
    This runs on master, primary and secondary nodes of the instance.
4326

4327
    """
4328
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4329
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4330
    return env, nl, nl
4331

    
4332
  def CheckPrereq(self):
4333
    """Check prerequisites.
4334

4335
    This checks that the instance is in the cluster and is not running.
4336

4337
    """
4338
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4339
    assert instance is not None, \
4340
      "Cannot retrieve locked instance %s" % self.op.instance_name
4341
    _CheckNodeOnline(self, instance.primary_node)
4342

    
4343
    if instance.disk_template == constants.DT_DISKLESS:
4344
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4345
                                 self.op.instance_name,
4346
                                 errors.ECODE_INVAL)
4347
    _CheckInstanceDown(self, instance, "cannot reinstall")
4348

    
4349
    self.op.os_type = getattr(self.op, "os_type", None)
4350
    self.op.force_variant = getattr(self.op, "force_variant", False)
4351
    if self.op.os_type is not None:
4352
      # OS verification
4353
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4354
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4355

    
4356
    self.instance = instance
4357

    
4358
  def Exec(self, feedback_fn):
4359
    """Reinstall the instance.
4360

4361
    """
4362
    inst = self.instance
4363

    
4364
    if self.op.os_type is not None:
4365
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4366
      inst.os = self.op.os_type
4367
      self.cfg.Update(inst, feedback_fn)
4368

    
4369
    _StartInstanceDisks(self, inst, None)
4370
    try:
4371
      feedback_fn("Running the instance OS create scripts...")
4372
      # FIXME: pass debug option from opcode to backend
4373
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4374
                                             self.op.debug_level)
4375
      result.Raise("Could not install OS for instance %s on node %s" %
4376
                   (inst.name, inst.primary_node))
4377
    finally:
4378
      _ShutdownInstanceDisks(self, inst)
4379

    
4380

    
4381
class LURecreateInstanceDisks(LogicalUnit):
4382
  """Recreate an instance's missing disks.
4383

4384
  """
4385
  HPATH = "instance-recreate-disks"
4386
  HTYPE = constants.HTYPE_INSTANCE
4387
  _OP_REQP = ["instance_name", "disks"]
4388
  REQ_BGL = False
4389

    
4390
  def CheckArguments(self):
4391
    """Check the arguments.
4392

4393
    """
4394
    if not isinstance(self.op.disks, list):
4395
      raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4396
    for item in self.op.disks:
4397
      if (not isinstance(item, int) or
4398
          item < 0):
4399
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
4400
                                   str(item), errors.ECODE_INVAL)
4401

    
4402
  def ExpandNames(self):
4403
    self._ExpandAndLockInstance()
4404

    
4405
  def BuildHooksEnv(self):
4406
    """Build hooks env.
4407

4408
    This runs on master, primary and secondary nodes of the instance.
4409

4410
    """
4411
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4412
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4413
    return env, nl, nl
4414

    
4415
  def CheckPrereq(self):
4416
    """Check prerequisites.
4417

4418
    This checks that the instance is in the cluster and is not running.
4419

4420
    """
4421
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4422
    assert instance is not None, \
4423
      "Cannot retrieve locked instance %s" % self.op.instance_name
4424
    _CheckNodeOnline(self, instance.primary_node)
4425

    
4426
    if instance.disk_template == constants.DT_DISKLESS:
4427
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4428
                                 self.op.instance_name, errors.ECODE_INVAL)
4429
    _CheckInstanceDown(self, instance, "cannot recreate disks")
4430

    
4431
    if not self.op.disks:
4432
      self.op.disks = range(len(instance.disks))
4433
    else:
4434
      for idx in self.op.disks:
4435
        if idx >= len(instance.disks):
4436
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4437
                                     errors.ECODE_INVAL)
4438

    
4439
    self.instance = instance
4440

    
4441
  def Exec(self, feedback_fn):
4442
    """Recreate the disks.
4443

4444
    """
4445
    to_skip = []
4446
    for idx, _ in enumerate(self.instance.disks):
4447
      if idx not in self.op.disks: # disk idx has not been passed in
4448
        to_skip.append(idx)
4449
        continue
4450

    
4451
    _CreateDisks(self, self.instance, to_skip=to_skip)
4452

    
4453

    
4454
class LURenameInstance(LogicalUnit):
4455
  """Rename an instance.
4456

4457
  """
4458
  HPATH = "instance-rename"
4459
  HTYPE = constants.HTYPE_INSTANCE
4460
  _OP_REQP = ["instance_name", "new_name"]
4461

    
4462
  def BuildHooksEnv(self):
4463
    """Build hooks env.
4464

4465
    This runs on master, primary and secondary nodes of the instance.
4466

4467
    """
4468
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4469
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4470
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4471
    return env, nl, nl
4472

    
4473
  def CheckPrereq(self):
4474
    """Check prerequisites.
4475

4476
    This checks that the instance is in the cluster and is not running.
4477

4478
    """
4479
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4480
                                                self.op.instance_name)
4481
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4482
    assert instance is not None
4483
    _CheckNodeOnline(self, instance.primary_node)
4484
    _CheckInstanceDown(self, instance, "cannot rename")
4485
    self.instance = instance
4486

    
4487
    # new name verification
4488
    name_info = utils.GetHostInfo(self.op.new_name)
4489

    
4490
    self.op.new_name = new_name = name_info.name
4491
    instance_list = self.cfg.GetInstanceList()
4492
    if new_name in instance_list:
4493
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4494
                                 new_name, errors.ECODE_EXISTS)
4495

    
4496
    if not getattr(self.op, "ignore_ip", False):
4497
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4498
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4499
                                   (name_info.ip, new_name),
4500
                                   errors.ECODE_NOTUNIQUE)
4501

    
4502

    
4503
  def Exec(self, feedback_fn):
4504
    """Reinstall the instance.
4505

4506
    """
4507
    inst = self.instance
4508
    old_name = inst.name
4509

    
4510
    if inst.disk_template == constants.DT_FILE:
4511
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4512

    
4513
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4514
    # Change the instance lock. This is definitely safe while we hold the BGL
4515
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4516
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4517

    
4518
    # re-read the instance from the configuration after rename
4519
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4520

    
4521
    if inst.disk_template == constants.DT_FILE:
4522
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4523
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4524
                                                     old_file_storage_dir,
4525
                                                     new_file_storage_dir)
4526
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4527
                   " (but the instance has been renamed in Ganeti)" %
4528
                   (inst.primary_node, old_file_storage_dir,
4529
                    new_file_storage_dir))
4530

    
4531
    _StartInstanceDisks(self, inst, None)
4532
    try:
4533
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4534
                                                 old_name, self.op.debug_level)
4535
      msg = result.fail_msg
4536
      if msg:
4537
        msg = ("Could not run OS rename script for instance %s on node %s"
4538
               " (but the instance has been renamed in Ganeti): %s" %
4539
               (inst.name, inst.primary_node, msg))
4540
        self.proc.LogWarning(msg)
4541
    finally:
4542
      _ShutdownInstanceDisks(self, inst)
4543

    
4544

    
4545
class LURemoveInstance(LogicalUnit):
4546
  """Remove an instance.
4547

4548
  """
4549
  HPATH = "instance-remove"
4550
  HTYPE = constants.HTYPE_INSTANCE
4551
  _OP_REQP = ["instance_name", "ignore_failures"]
4552
  REQ_BGL = False
4553

    
4554
  def CheckArguments(self):
4555
    """Check the arguments.
4556

4557
    """
4558
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4559
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4560

    
4561
  def ExpandNames(self):
4562
    self._ExpandAndLockInstance()
4563
    self.needed_locks[locking.LEVEL_NODE] = []
4564
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4565

    
4566
  def DeclareLocks(self, level):
4567
    if level == locking.LEVEL_NODE:
4568
      self._LockInstancesNodes()
4569

    
4570
  def BuildHooksEnv(self):
4571
    """Build hooks env.
4572

4573
    This runs on master, primary and secondary nodes of the instance.
4574

4575
    """
4576
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4577
    env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4578
    nl = [self.cfg.GetMasterNode()]
4579
    nl_post = list(self.instance.all_nodes) + nl
4580
    return env, nl, nl_post
4581

    
4582
  def CheckPrereq(self):
4583
    """Check prerequisites.
4584

4585
    This checks that the instance is in the cluster.
4586

4587
    """
4588
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4589
    assert self.instance is not None, \
4590
      "Cannot retrieve locked instance %s" % self.op.instance_name
4591

    
4592
  def Exec(self, feedback_fn):
4593
    """Remove the instance.
4594

4595
    """
4596
    instance = self.instance
4597
    logging.info("Shutting down instance %s on node %s",
4598
                 instance.name, instance.primary_node)
4599

    
4600
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4601
                                             self.shutdown_timeout)
4602
    msg = result.fail_msg
4603
    if msg:
4604
      if self.op.ignore_failures:
4605
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
4606
      else:
4607
        raise errors.OpExecError("Could not shutdown instance %s on"
4608
                                 " node %s: %s" %
4609
                                 (instance.name, instance.primary_node, msg))
4610

    
4611
    logging.info("Removing block devices for instance %s", instance.name)
4612

    
4613
    if not _RemoveDisks(self, instance):
4614
      if self.op.ignore_failures:
4615
        feedback_fn("Warning: can't remove instance's disks")
4616
      else:
4617
        raise errors.OpExecError("Can't remove instance's disks")
4618

    
4619
    logging.info("Removing instance %s out of cluster config", instance.name)
4620

    
4621
    self.cfg.RemoveInstance(instance.name)
4622
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4623

    
4624

    
4625
class LUQueryInstances(NoHooksLU):
4626
  """Logical unit for querying instances.
4627

4628
  """
4629
  # pylint: disable-msg=W0142
4630
  _OP_REQP = ["output_fields", "names", "use_locking"]
4631
  REQ_BGL = False
4632
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4633
                    "serial_no", "ctime", "mtime", "uuid"]
4634
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4635
                                    "admin_state",
4636
                                    "disk_template", "ip", "mac", "bridge",
4637
                                    "nic_mode", "nic_link",
4638
                                    "sda_size", "sdb_size", "vcpus", "tags",
4639
                                    "network_port", "beparams",
4640
                                    r"(disk)\.(size)/([0-9]+)",
4641
                                    r"(disk)\.(sizes)", "disk_usage",
4642
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4643
                                    r"(nic)\.(bridge)/([0-9]+)",
4644
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
4645
                                    r"(disk|nic)\.(count)",
4646
                                    "hvparams",
4647
                                    ] + _SIMPLE_FIELDS +
4648
                                  ["hv/%s" % name
4649
                                   for name in constants.HVS_PARAMETERS
4650
                                   if name not in constants.HVC_GLOBALS] +
4651
                                  ["be/%s" % name
4652
                                   for name in constants.BES_PARAMETERS])
4653
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4654

    
4655

    
4656
  def ExpandNames(self):
4657
    _CheckOutputFields(static=self._FIELDS_STATIC,
4658
                       dynamic=self._FIELDS_DYNAMIC,
4659
                       selected=self.op.output_fields)
4660

    
4661
    self.needed_locks = {}
4662
    self.share_locks[locking.LEVEL_INSTANCE] = 1
4663
    self.share_locks[locking.LEVEL_NODE] = 1
4664

    
4665
    if self.op.names:
4666
      self.wanted = _GetWantedInstances(self, self.op.names)
4667
    else:
4668
      self.wanted = locking.ALL_SET
4669

    
4670
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4671
    self.do_locking = self.do_node_query and self.op.use_locking
4672
    if self.do_locking:
4673
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4674
      self.needed_locks[locking.LEVEL_NODE] = []
4675
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4676

    
4677
  def DeclareLocks(self, level):
4678
    if level == locking.LEVEL_NODE and self.do_locking:
4679
      self._LockInstancesNodes()
4680

    
4681
  def CheckPrereq(self):
4682
    """Check prerequisites.
4683

4684
    """
4685
    pass
4686

    
4687
  def Exec(self, feedback_fn):
4688
    """Computes the list of nodes and their attributes.
4689

4690
    """
4691
    # pylint: disable-msg=R0912
4692
    # way too many branches here
4693
    all_info = self.cfg.GetAllInstancesInfo()
4694
    if self.wanted == locking.ALL_SET:
4695
      # caller didn't specify instance names, so ordering is not important
4696
      if self.do_locking:
4697
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4698
      else:
4699
        instance_names = all_info.keys()
4700
      instance_names = utils.NiceSort(instance_names)
4701
    else:
4702
      # caller did specify names, so we must keep the ordering
4703
      if self.do_locking:
4704
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4705
      else:
4706
        tgt_set = all_info.keys()
4707
      missing = set(self.wanted).difference(tgt_set)
4708
      if missing:
4709
        raise errors.OpExecError("Some instances were removed before"
4710
                                 " retrieving their data: %s" % missing)
4711
      instance_names = self.wanted
4712

    
4713
    instance_list = [all_info[iname] for iname in instance_names]
4714

    
4715
    # begin data gathering
4716

    
4717
    nodes = frozenset([inst.primary_node for inst in instance_list])
4718
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4719

    
4720
    bad_nodes = []
4721
    off_nodes = []
4722
    if self.do_node_query:
4723
      live_data = {}
4724
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4725
      for name in nodes:
4726
        result = node_data[name]
4727
        if result.offline:
4728
          # offline nodes will be in both lists
4729
          off_nodes.append(name)
4730
        if result.fail_msg:
4731
          bad_nodes.append(name)
4732
        else:
4733
          if result.payload:
4734
            live_data.update(result.payload)
4735
          # else no instance is alive
4736
    else:
4737
      live_data = dict([(name, {}) for name in instance_names])
4738

    
4739
    # end data gathering
4740

    
4741
    HVPREFIX = "hv/"
4742
    BEPREFIX = "be/"
4743
    output = []
4744
    cluster = self.cfg.GetClusterInfo()
4745
    for instance in instance_list:
4746
      iout = []
4747
      i_hv = cluster.FillHV(instance, skip_globals=True)
4748
      i_be = cluster.FillBE(instance)
4749
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4750
                                 nic.nicparams) for nic in instance.nics]
4751
      for field in self.op.output_fields:
4752
        st_match = self._FIELDS_STATIC.Matches(field)
4753
        if field in self._SIMPLE_FIELDS:
4754
          val = getattr(instance, field)
4755
        elif field == "pnode":
4756
          val = instance.primary_node
4757
        elif field == "snodes":
4758
          val = list(instance.secondary_nodes)
4759
        elif field == "admin_state":
4760
          val = instance.admin_up
4761
        elif field == "oper_state":
4762
          if instance.primary_node in bad_nodes:
4763
            val = None
4764
          else:
4765
            val = bool(live_data.get(instance.name))
4766
        elif field == "status":
4767
          if instance.primary_node in off_nodes:
4768
            val = "ERROR_nodeoffline"
4769
          elif instance.primary_node in bad_nodes:
4770
            val = "ERROR_nodedown"
4771
          else:
4772
            running = bool(live_data.get(instance.name))
4773
            if running:
4774
              if instance.admin_up:
4775
                val = "running"
4776
              else:
4777
                val = "ERROR_up"
4778
            else:
4779
              if instance.admin_up:
4780
                val = "ERROR_down"
4781
              else:
4782
                val = "ADMIN_down"
4783
        elif field == "oper_ram":
4784
          if instance.primary_node in bad_nodes:
4785
            val = None
4786
          elif instance.name in live_data:
4787
            val = live_data[instance.name].get("memory", "?")
4788
          else:
4789
            val = "-"
4790
        elif field == "vcpus":
4791
          val = i_be[constants.BE_VCPUS]
4792
        elif field == "disk_template":
4793
          val = instance.disk_template
4794
        elif field == "ip":
4795
          if instance.nics:
4796
            val = instance.nics[0].ip
4797
          else:
4798
            val = None
4799
        elif field == "nic_mode":
4800
          if instance.nics:
4801
            val = i_nicp[0][constants.NIC_MODE]
4802
          else:
4803
            val = None
4804
        elif field == "nic_link":
4805
          if instance.nics:
4806
            val = i_nicp[0][constants.NIC_LINK]
4807
          else:
4808
            val = None
4809
        elif field == "bridge":
4810
          if (instance.nics and
4811
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4812
            val = i_nicp[0][constants.NIC_LINK]
4813
          else:
4814
            val = None
4815
        elif field == "mac":
4816
          if instance.nics:
4817
            val = instance.nics[0].mac
4818
          else:
4819
            val = None
4820
        elif field == "sda_size" or field == "sdb_size":
4821
          idx = ord(field[2]) - ord('a')
4822
          try:
4823
            val = instance.FindDisk(idx).size
4824
          except errors.OpPrereqError:
4825
            val = None
4826
        elif field == "disk_usage": # total disk usage per node
4827
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
4828
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4829
        elif field == "tags":
4830
          val = list(instance.GetTags())
4831
        elif field == "hvparams":
4832
          val = i_hv
4833
        elif (field.startswith(HVPREFIX) and
4834
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4835
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4836
          val = i_hv.get(field[len(HVPREFIX):], None)
4837
        elif field == "beparams":
4838
          val = i_be
4839
        elif (field.startswith(BEPREFIX) and
4840
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4841
          val = i_be.get(field[len(BEPREFIX):], None)
4842
        elif st_match and st_match.groups():
4843
          # matches a variable list
4844
          st_groups = st_match.groups()
4845
          if st_groups and st_groups[0] == "disk":
4846
            if st_groups[1] == "count":
4847
              val = len(instance.disks)
4848
            elif st_groups[1] == "sizes":
4849
              val = [disk.size for disk in instance.disks]
4850
            elif st_groups[1] == "size":
4851
              try:
4852
                val = instance.FindDisk(st_groups[2]).size
4853
              except errors.OpPrereqError:
4854
                val = None
4855
            else:
4856
              assert False, "Unhandled disk parameter"
4857
          elif st_groups[0] == "nic":
4858
            if st_groups[1] == "count":
4859
              val = len(instance.nics)
4860
            elif st_groups[1] == "macs":
4861
              val = [nic.mac for nic in instance.nics]
4862
            elif st_groups[1] == "ips":
4863
              val = [nic.ip for nic in instance.nics]
4864
            elif st_groups[1] == "modes":
4865
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4866
            elif st_groups[1] == "links":
4867
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4868
            elif st_groups[1] == "bridges":
4869
              val = []
4870
              for nicp in i_nicp:
4871
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4872
                  val.append(nicp[constants.NIC_LINK])
4873
                else:
4874
                  val.append(None)
4875
            else:
4876
              # index-based item
4877
              nic_idx = int(st_groups[2])
4878
              if nic_idx >= len(instance.nics):
4879
                val = None
4880
              else:
4881
                if st_groups[1] == "mac":
4882
                  val = instance.nics[nic_idx].mac
4883
                elif st_groups[1] == "ip":
4884
                  val = instance.nics[nic_idx].ip
4885
                elif st_groups[1] == "mode":
4886
                  val = i_nicp[nic_idx][constants.NIC_MODE]
4887
                elif st_groups[1] == "link":
4888
                  val = i_nicp[nic_idx][constants.NIC_LINK]
4889
                elif st_groups[1] == "bridge":
4890
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4891
                  if nic_mode == constants.NIC_MODE_BRIDGED:
4892
                    val = i_nicp[nic_idx][constants.NIC_LINK]
4893
                  else:
4894
                    val = None
4895
                else:
4896
                  assert False, "Unhandled NIC parameter"
4897
          else:
4898
            assert False, ("Declared but unhandled variable parameter '%s'" %
4899
                           field)
4900
        else:
4901
          assert False, "Declared but unhandled parameter '%s'" % field
4902
        iout.append(val)
4903
      output.append(iout)
4904

    
4905
    return output
4906

    
4907

    
4908
class LUFailoverInstance(LogicalUnit):
4909
  """Failover an instance.
4910

4911
  """
4912
  HPATH = "instance-failover"
4913
  HTYPE = constants.HTYPE_INSTANCE
4914
  _OP_REQP = ["instance_name", "ignore_consistency"]
4915
  REQ_BGL = False
4916

    
4917
  def CheckArguments(self):
4918
    """Check the arguments.
4919

4920
    """
4921
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4922
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4923

    
4924
  def ExpandNames(self):
4925
    self._ExpandAndLockInstance()
4926
    self.needed_locks[locking.LEVEL_NODE] = []
4927
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4928

    
4929
  def DeclareLocks(self, level):
4930
    if level == locking.LEVEL_NODE:
4931
      self._LockInstancesNodes()
4932

    
4933
  def BuildHooksEnv(self):
4934
    """Build hooks env.
4935

4936
    This runs on master, primary and secondary nodes of the instance.
4937

4938
    """
4939
    instance = self.instance
4940
    source_node = instance.primary_node
4941
    target_node = instance.secondary_nodes[0]
4942
    env = {
4943
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4944
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4945
      "OLD_PRIMARY": source_node,
4946
      "OLD_SECONDARY": target_node,
4947
      "NEW_PRIMARY": target_node,
4948
      "NEW_SECONDARY": source_node,
4949
      }
4950
    env.update(_BuildInstanceHookEnvByObject(self, instance))
4951
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4952
    nl_post = list(nl)
4953
    nl_post.append(source_node)
4954
    return env, nl, nl_post
4955

    
4956
  def CheckPrereq(self):
4957
    """Check prerequisites.
4958

4959
    This checks that the instance is in the cluster.
4960

4961
    """
4962
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4963
    assert self.instance is not None, \
4964
      "Cannot retrieve locked instance %s" % self.op.instance_name
4965

    
4966
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4967
    if instance.disk_template not in constants.DTS_NET_MIRROR:
4968
      raise errors.OpPrereqError("Instance's disk layout is not"
4969
                                 " network mirrored, cannot failover.",
4970
                                 errors.ECODE_STATE)
4971

    
4972
    secondary_nodes = instance.secondary_nodes
4973
    if not secondary_nodes:
4974
      raise errors.ProgrammerError("no secondary node but using "
4975
                                   "a mirrored disk template")
4976

    
4977
    target_node = secondary_nodes[0]
4978
    _CheckNodeOnline(self, target_node)
4979
    _CheckNodeNotDrained(self, target_node)
4980
    if instance.admin_up:
4981
      # check memory requirements on the secondary node
4982
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4983
                           instance.name, bep[constants.BE_MEMORY],
4984
                           instance.hypervisor)
4985
    else:
4986
      self.LogInfo("Not checking memory on the secondary node as"
4987
                   " instance will not be started")
4988

    
4989
    # check bridge existance
4990
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4991

    
4992
  def Exec(self, feedback_fn):
4993
    """Failover an instance.
4994

4995
    The failover is done by shutting it down on its present node and
4996
    starting it on the secondary.
4997

4998
    """
4999
    instance = self.instance
5000

    
5001
    source_node = instance.primary_node
5002
    target_node = instance.secondary_nodes[0]
5003

    
5004
    if instance.admin_up:
5005
      feedback_fn("* checking disk consistency between source and target")
5006
      for dev in instance.disks:
5007
        # for drbd, these are drbd over lvm
5008
        if not _CheckDiskConsistency(self, dev, target_node, False):
5009
          if not self.op.ignore_consistency:
5010
            raise errors.OpExecError("Disk %s is degraded on target node,"
5011
                                     " aborting failover." % dev.iv_name)
5012
    else:
5013
      feedback_fn("* not checking disk consistency as instance is not running")
5014

    
5015
    feedback_fn("* shutting down instance on source node")
5016
    logging.info("Shutting down instance %s on node %s",
5017
                 instance.name, source_node)
5018

    
5019
    result = self.rpc.call_instance_shutdown(source_node, instance,
5020
                                             self.shutdown_timeout)
5021
    msg = result.fail_msg
5022
    if msg:
5023
      if self.op.ignore_consistency:
5024
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5025
                             " Proceeding anyway. Please make sure node"
5026
                             " %s is down. Error details: %s",
5027
                             instance.name, source_node, source_node, msg)
5028
      else:
5029
        raise errors.OpExecError("Could not shutdown instance %s on"
5030
                                 " node %s: %s" %
5031
                                 (instance.name, source_node, msg))
5032

    
5033
    feedback_fn("* deactivating the instance's disks on source node")
5034
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5035
      raise errors.OpExecError("Can't shut down the instance's disks.")
5036

    
5037
    instance.primary_node = target_node
5038
    # distribute new instance config to the other nodes
5039
    self.cfg.Update(instance, feedback_fn)
5040

    
5041
    # Only start the instance if it's marked as up
5042
    if instance.admin_up:
5043
      feedback_fn("* activating the instance's disks on target node")
5044
      logging.info("Starting instance %s on node %s",
5045
                   instance.name, target_node)
5046

    
5047
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5048
                                               ignore_secondaries=True)
5049
      if not disks_ok:
5050
        _ShutdownInstanceDisks(self, instance)
5051
        raise errors.OpExecError("Can't activate the instance's disks")
5052

    
5053
      feedback_fn("* starting the instance on the target node")
5054
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5055
      msg = result.fail_msg
5056
      if msg:
5057
        _ShutdownInstanceDisks(self, instance)
5058
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5059
                                 (instance.name, target_node, msg))
5060

    
5061

    
5062
class LUMigrateInstance(LogicalUnit):
5063
  """Migrate an instance.
5064

5065
  This is migration without shutting down, compared to the failover,
5066
  which is done with shutdown.
5067

5068
  """
5069
  HPATH = "instance-migrate"
5070
  HTYPE = constants.HTYPE_INSTANCE
5071
  _OP_REQP = ["instance_name", "live", "cleanup"]
5072

    
5073
  REQ_BGL = False
5074

    
5075
  def ExpandNames(self):
5076
    self._ExpandAndLockInstance()
5077

    
5078
    self.needed_locks[locking.LEVEL_NODE] = []
5079
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5080

    
5081
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5082
                                       self.op.live, self.op.cleanup)
5083
    self.tasklets = [self._migrater]
5084

    
5085
  def DeclareLocks(self, level):
5086
    if level == locking.LEVEL_NODE:
5087
      self._LockInstancesNodes()
5088

    
5089
  def BuildHooksEnv(self):
5090
    """Build hooks env.
5091

5092
    This runs on master, primary and secondary nodes of the instance.
5093

5094
    """
5095
    instance = self._migrater.instance
5096
    source_node = instance.primary_node
5097
    target_node = instance.secondary_nodes[0]
5098
    env = _BuildInstanceHookEnvByObject(self, instance)
5099
    env["MIGRATE_LIVE"] = self.op.live
5100
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5101
    env.update({
5102
        "OLD_PRIMARY": source_node,
5103
        "OLD_SECONDARY": target_node,
5104
        "NEW_PRIMARY": target_node,
5105
        "NEW_SECONDARY": source_node,
5106
        })
5107
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5108
    nl_post = list(nl)
5109
    nl_post.append(source_node)
5110
    return env, nl, nl_post
5111

    
5112

    
5113
class LUMoveInstance(LogicalUnit):
5114
  """Move an instance by data-copying.
5115

5116
  """
5117
  HPATH = "instance-move"
5118
  HTYPE = constants.HTYPE_INSTANCE
5119
  _OP_REQP = ["instance_name", "target_node"]
5120
  REQ_BGL = False
5121

    
5122
  def CheckArguments(self):
5123
    """Check the arguments.
5124

5125
    """
5126
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5127
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
5128

    
5129
  def ExpandNames(self):
5130
    self._ExpandAndLockInstance()
5131
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5132
    self.op.target_node = target_node
5133
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5134
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5135

    
5136
  def DeclareLocks(self, level):
5137
    if level == locking.LEVEL_NODE:
5138
      self._LockInstancesNodes(primary_only=True)
5139

    
5140
  def BuildHooksEnv(self):
5141
    """Build hooks env.
5142

5143
    This runs on master, primary and secondary nodes of the instance.
5144

5145
    """
5146
    env = {
5147
      "TARGET_NODE": self.op.target_node,
5148
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5149
      }
5150
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5151
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5152
                                       self.op.target_node]
5153
    return env, nl, nl
5154

    
5155
  def CheckPrereq(self):
5156
    """Check prerequisites.
5157

5158
    This checks that the instance is in the cluster.
5159

5160
    """
5161
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5162
    assert self.instance is not None, \
5163
      "Cannot retrieve locked instance %s" % self.op.instance_name
5164

    
5165
    node = self.cfg.GetNodeInfo(self.op.target_node)
5166
    assert node is not None, \
5167
      "Cannot retrieve locked node %s" % self.op.target_node
5168

    
5169
    self.target_node = target_node = node.name
5170

    
5171
    if target_node == instance.primary_node:
5172
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5173
                                 (instance.name, target_node),
5174
                                 errors.ECODE_STATE)
5175

    
5176
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5177

    
5178
    for idx, dsk in enumerate(instance.disks):
5179
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5180
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5181
                                   " cannot copy" % idx, errors.ECODE_STATE)
5182

    
5183
    _CheckNodeOnline(self, target_node)
5184
    _CheckNodeNotDrained(self, target_node)
5185

    
5186
    if instance.admin_up:
5187
      # check memory requirements on the secondary node
5188
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5189
                           instance.name, bep[constants.BE_MEMORY],
5190
                           instance.hypervisor)
5191
    else:
5192
      self.LogInfo("Not checking memory on the secondary node as"
5193
                   " instance will not be started")
5194

    
5195
    # check bridge existance
5196
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5197

    
5198
  def Exec(self, feedback_fn):
5199
    """Move an instance.
5200

5201
    The move is done by shutting it down on its present node, copying
5202
    the data over (slow) and starting it on the new node.
5203

5204
    """
5205
    instance = self.instance
5206

    
5207
    source_node = instance.primary_node
5208
    target_node = self.target_node
5209

    
5210
    self.LogInfo("Shutting down instance %s on source node %s",
5211
                 instance.name, source_node)
5212

    
5213
    result = self.rpc.call_instance_shutdown(source_node, instance,
5214
                                             self.shutdown_timeout)
5215
    msg = result.fail_msg
5216
    if msg:
5217
      if self.op.ignore_consistency:
5218
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5219
                             " Proceeding anyway. Please make sure node"
5220
                             " %s is down. Error details: %s",
5221
                             instance.name, source_node, source_node, msg)
5222
      else:
5223
        raise errors.OpExecError("Could not shutdown instance %s on"
5224
                                 " node %s: %s" %
5225
                                 (instance.name, source_node, msg))
5226

    
5227
    # create the target disks
5228
    try:
5229
      _CreateDisks(self, instance, target_node=target_node)
5230
    except errors.OpExecError:
5231
      self.LogWarning("Device creation failed, reverting...")
5232
      try:
5233
        _RemoveDisks(self, instance, target_node=target_node)
5234
      finally:
5235
        self.cfg.ReleaseDRBDMinors(instance.name)
5236
        raise
5237

    
5238
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5239

    
5240
    errs = []
5241
    # activate, get path, copy the data over
5242
    for idx, disk in enumerate(instance.disks):
5243
      self.LogInfo("Copying data for disk %d", idx)
5244
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5245
                                               instance.name, True)
5246
      if result.fail_msg:
5247
        self.LogWarning("Can't assemble newly created disk %d: %s",
5248
                        idx, result.fail_msg)
5249
        errs.append(result.fail_msg)
5250
        break
5251
      dev_path = result.payload
5252
      result = self.rpc.call_blockdev_export(source_node, disk,
5253
                                             target_node, dev_path,
5254
                                             cluster_name)
5255
      if result.fail_msg:
5256
        self.LogWarning("Can't copy data over for disk %d: %s",
5257
                        idx, result.fail_msg)
5258
        errs.append(result.fail_msg)
5259
        break
5260

    
5261
    if errs:
5262
      self.LogWarning("Some disks failed to copy, aborting")
5263
      try:
5264
        _RemoveDisks(self, instance, target_node=target_node)
5265
      finally:
5266
        self.cfg.ReleaseDRBDMinors(instance.name)
5267
        raise errors.OpExecError("Errors during disk copy: %s" %
5268
                                 (",".join(errs),))
5269

    
5270
    instance.primary_node = target_node
5271
    self.cfg.Update(instance, feedback_fn)
5272

    
5273
    self.LogInfo("Removing the disks on the original node")
5274
    _RemoveDisks(self, instance, target_node=source_node)
5275

    
5276
    # Only start the instance if it's marked as up
5277
    if instance.admin_up:
5278
      self.LogInfo("Starting instance %s on node %s",
5279
                   instance.name, target_node)
5280

    
5281
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5282
                                           ignore_secondaries=True)
5283
      if not disks_ok:
5284
        _ShutdownInstanceDisks(self, instance)
5285
        raise errors.OpExecError("Can't activate the instance's disks")
5286

    
5287
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5288
      msg = result.fail_msg
5289
      if msg:
5290
        _ShutdownInstanceDisks(self, instance)
5291
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5292
                                 (instance.name, target_node, msg))
5293

    
5294

    
5295
class LUMigrateNode(LogicalUnit):
5296
  """Migrate all instances from a node.
5297

5298
  """
5299
  HPATH = "node-migrate"
5300
  HTYPE = constants.HTYPE_NODE
5301
  _OP_REQP = ["node_name", "live"]
5302
  REQ_BGL = False
5303

    
5304
  def ExpandNames(self):
5305
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5306

    
5307
    self.needed_locks = {
5308
      locking.LEVEL_NODE: [self.op.node_name],
5309
      }
5310

    
5311
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5312

    
5313
    # Create tasklets for migrating instances for all instances on this node
5314
    names = []
5315
    tasklets = []
5316

    
5317
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5318
      logging.debug("Migrating instance %s", inst.name)
5319
      names.append(inst.name)
5320

    
5321
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5322

    
5323
    self.tasklets = tasklets
5324

    
5325
    # Declare instance locks
5326
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5327

    
5328
  def DeclareLocks(self, level):
5329
    if level == locking.LEVEL_NODE:
5330
      self._LockInstancesNodes()
5331

    
5332
  def BuildHooksEnv(self):
5333
    """Build hooks env.
5334

5335
    This runs on the master, the primary and all the secondaries.
5336

5337
    """
5338
    env = {
5339
      "NODE_NAME": self.op.node_name,
5340
      }
5341

    
5342
    nl = [self.cfg.GetMasterNode()]
5343

    
5344
    return (env, nl, nl)
5345

    
5346

    
5347
class TLMigrateInstance(Tasklet):
5348
  def __init__(self, lu, instance_name, live, cleanup):
5349
    """Initializes this class.
5350

5351
    """
5352
    Tasklet.__init__(self, lu)
5353

    
5354
    # Parameters
5355
    self.instance_name = instance_name
5356
    self.live = live
5357
    self.cleanup = cleanup
5358

    
5359
  def CheckPrereq(self):
5360
    """Check prerequisites.
5361

5362
    This checks that the instance is in the cluster.
5363

5364
    """
5365
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5366
    instance = self.cfg.GetInstanceInfo(instance_name)
5367
    assert instance is not None
5368

    
5369
    if instance.disk_template != constants.DT_DRBD8:
5370
      raise errors.OpPrereqError("Instance's disk layout is not"
5371
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5372

    
5373
    secondary_nodes = instance.secondary_nodes
5374
    if not secondary_nodes:
5375
      raise errors.ConfigurationError("No secondary node but using"
5376
                                      " drbd8 disk template")
5377

    
5378
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5379

    
5380
    target_node = secondary_nodes[0]
5381
    # check memory requirements on the secondary node
5382
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5383
                         instance.name, i_be[constants.BE_MEMORY],
5384
                         instance.hypervisor)
5385

    
5386
    # check bridge existance
5387
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5388

    
5389
    if not self.cleanup:
5390
      _CheckNodeNotDrained(self, target_node)
5391
      result = self.rpc.call_instance_migratable(instance.primary_node,
5392
                                                 instance)
5393
      result.Raise("Can't migrate, please use failover",
5394
                   prereq=True, ecode=errors.ECODE_STATE)
5395

    
5396
    self.instance = instance
5397

    
5398
  def _WaitUntilSync(self):
5399
    """Poll with custom rpc for disk sync.
5400

5401
    This uses our own step-based rpc call.
5402

5403
    """
5404
    self.feedback_fn("* wait until resync is done")
5405
    all_done = False
5406
    while not all_done:
5407
      all_done = True
5408
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5409
                                            self.nodes_ip,
5410
                                            self.instance.disks)
5411
      min_percent = 100
5412
      for node, nres in result.items():
5413
        nres.Raise("Cannot resync disks on node %s" % node)
5414
        node_done, node_percent = nres.payload
5415
        all_done = all_done and node_done
5416
        if node_percent is not None:
5417
          min_percent = min(min_percent, node_percent)
5418
      if not all_done:
5419
        if min_percent < 100:
5420
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5421
        time.sleep(2)
5422

    
5423
  def _EnsureSecondary(self, node):
5424
    """Demote a node to secondary.
5425

5426
    """
5427
    self.feedback_fn("* switching node %s to secondary mode" % node)
5428

    
5429
    for dev in self.instance.disks:
5430
      self.cfg.SetDiskID(dev, node)
5431

    
5432
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5433
                                          self.instance.disks)
5434
    result.Raise("Cannot change disk to secondary on node %s" % node)
5435

    
5436
  def _GoStandalone(self):
5437
    """Disconnect from the network.
5438

5439
    """
5440
    self.feedback_fn("* changing into standalone mode")
5441
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5442
                                               self.instance.disks)
5443
    for node, nres in result.items():
5444
      nres.Raise("Cannot disconnect disks node %s" % node)
5445

    
5446
  def _GoReconnect(self, multimaster):
5447
    """Reconnect to the network.
5448

5449
    """
5450
    if multimaster:
5451
      msg = "dual-master"
5452
    else:
5453
      msg = "single-master"
5454
    self.feedback_fn("* changing disks into %s mode" % msg)
5455
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5456
                                           self.instance.disks,
5457
                                           self.instance.name, multimaster)
5458
    for node, nres in result.items():
5459
      nres.Raise("Cannot change disks config on node %s" % node)
5460

    
5461
  def _ExecCleanup(self):
5462
    """Try to cleanup after a failed migration.
5463

5464
    The cleanup is done by:
5465
      - check that the instance is running only on one node
5466
        (and update the config if needed)
5467
      - change disks on its secondary node to secondary
5468
      - wait until disks are fully synchronized
5469
      - disconnect from the network
5470
      - change disks into single-master mode
5471
      - wait again until disks are fully synchronized
5472

5473
    """
5474
    instance = self.instance
5475
    target_node = self.target_node
5476
    source_node = self.source_node
5477

    
5478
    # check running on only one node
5479
    self.feedback_fn("* checking where the instance actually runs"
5480
                     " (if this hangs, the hypervisor might be in"
5481
                     " a bad state)")
5482
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5483
    for node, result in ins_l.items():
5484
      result.Raise("Can't contact node %s" % node)
5485

    
5486
    runningon_source = instance.name in ins_l[source_node].payload
5487
    runningon_target = instance.name in ins_l[target_node].payload
5488

    
5489
    if runningon_source and runningon_target:
5490
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5491
                               " or the hypervisor is confused. You will have"
5492
                               " to ensure manually that it runs only on one"
5493
                               " and restart this operation.")
5494

    
5495
    if not (runningon_source or runningon_target):
5496
      raise errors.OpExecError("Instance does not seem to be running at all."
5497
                               " In this case, it's safer to repair by"
5498
                               " running 'gnt-instance stop' to ensure disk"
5499
                               " shutdown, and then restarting it.")
5500

    
5501
    if runningon_target:
5502
      # the migration has actually succeeded, we need to update the config
5503
      self.feedback_fn("* instance running on secondary node (%s),"
5504
                       " updating config" % target_node)
5505
      instance.primary_node = target_node
5506
      self.cfg.Update(instance, self.feedback_fn)
5507
      demoted_node = source_node
5508
    else:
5509
      self.feedback_fn("* instance confirmed to be running on its"
5510
                       " primary node (%s)" % source_node)
5511
      demoted_node = target_node
5512

    
5513
    self._EnsureSecondary(demoted_node)
5514
    try:
5515
      self._WaitUntilSync()
5516
    except errors.OpExecError:
5517
      # we ignore here errors, since if the device is standalone, it
5518
      # won't be able to sync
5519
      pass
5520
    self._GoStandalone()
5521
    self._GoReconnect(False)
5522
    self._WaitUntilSync()
5523

    
5524
    self.feedback_fn("* done")
5525

    
5526
  def _RevertDiskStatus(self):
5527
    """Try to revert the disk status after a failed migration.
5528

5529
    """
5530
    target_node = self.target_node
5531
    try:
5532
      self._EnsureSecondary(target_node)
5533
      self._GoStandalone()
5534
      self._GoReconnect(False)
5535
      self._WaitUntilSync()
5536
    except errors.OpExecError, err:
5537
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5538
                         " drives: error '%s'\n"
5539
                         "Please look and recover the instance status" %
5540
                         str(err))
5541

    
5542
  def _AbortMigration(self):
5543
    """Call the hypervisor code to abort a started migration.
5544

5545
    """
5546
    instance = self.instance
5547
    target_node = self.target_node
5548
    migration_info = self.migration_info
5549

    
5550
    abort_result = self.rpc.call_finalize_migration(target_node,
5551
                                                    instance,
5552
                                                    migration_info,
5553
                                                    False)
5554
    abort_msg = abort_result.fail_msg
5555
    if abort_msg:
5556
      logging.error("Aborting migration failed on target node %s: %s",
5557
                    target_node, abort_msg)
5558
      # Don't raise an exception here, as we stil have to try to revert the
5559
      # disk status, even if this step failed.
5560

    
5561
  def _ExecMigration(self):
5562
    """Migrate an instance.
5563

5564
    The migrate is done by:
5565
      - change the disks into dual-master mode
5566
      - wait until disks are fully synchronized again
5567
      - migrate the instance
5568
      - change disks on the new secondary node (the old primary) to secondary
5569
      - wait until disks are fully synchronized
5570
      - change disks into single-master mode
5571

5572
    """
5573
    instance = self.instance
5574
    target_node = self.target_node
5575
    source_node = self.source_node
5576

    
5577
    self.feedback_fn("* checking disk consistency between source and target")
5578
    for dev in instance.disks:
5579
      if not _CheckDiskConsistency(self, dev, target_node, False):
5580
        raise errors.OpExecError("Disk %s is degraded or not fully"
5581
                                 " synchronized on target node,"
5582
                                 " aborting migrate." % dev.iv_name)
5583

    
5584
    # First get the migration information from the remote node
5585
    result = self.rpc.call_migration_info(source_node, instance)
5586
    msg = result.fail_msg
5587
    if msg:
5588
      log_err = ("Failed fetching source migration information from %s: %s" %
5589
                 (source_node, msg))
5590
      logging.error(log_err)
5591
      raise errors.OpExecError(log_err)
5592

    
5593
    self.migration_info = migration_info = result.payload
5594

    
5595
    # Then switch the disks to master/master mode
5596
    self._EnsureSecondary(target_node)
5597
    self._GoStandalone()
5598
    self._GoReconnect(True)
5599
    self._WaitUntilSync()
5600

    
5601
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
5602
    result = self.rpc.call_accept_instance(target_node,
5603
                                           instance,
5604
                                           migration_info,
5605
                                           self.nodes_ip[target_node])
5606

    
5607
    msg = result.fail_msg
5608
    if msg:
5609
      logging.error("Instance pre-migration failed, trying to revert"
5610
                    " disk status: %s", msg)
5611
      self.feedback_fn("Pre-migration failed, aborting")
5612
      self._AbortMigration()
5613
      self._RevertDiskStatus()
5614
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5615
                               (instance.name, msg))
5616

    
5617
    self.feedback_fn("* migrating instance to %s" % target_node)
5618
    time.sleep(10)
5619
    result = self.rpc.call_instance_migrate(source_node, instance,
5620
                                            self.nodes_ip[target_node],
5621
                                            self.live)
5622
    msg = result.fail_msg
5623
    if msg:
5624
      logging.error("Instance migration failed, trying to revert"
5625
                    " disk status: %s", msg)
5626
      self.feedback_fn("Migration failed, aborting")
5627
      self._AbortMigration()
5628
      self._RevertDiskStatus()
5629
      raise errors.OpExecError("Could not migrate instance %s: %s" %
5630
                               (instance.name, msg))
5631
    time.sleep(10)
5632

    
5633
    instance.primary_node = target_node
5634
    # distribute new instance config to the other nodes
5635
    self.cfg.Update(instance, self.feedback_fn)
5636

    
5637
    result = self.rpc.call_finalize_migration(target_node,
5638
                                              instance,
5639
                                              migration_info,
5640
                                              True)
5641
    msg = result.fail_msg
5642
    if msg:
5643
      logging.error("Instance migration succeeded, but finalization failed:"
5644
                    " %s", msg)
5645
      raise errors.OpExecError("Could not finalize instance migration: %s" %
5646
                               msg)
5647

    
5648
    self._EnsureSecondary(source_node)
5649
    self._WaitUntilSync()
5650
    self._GoStandalone()
5651
    self._GoReconnect(False)
5652
    self._WaitUntilSync()
5653

    
5654
    self.feedback_fn("* done")
5655

    
5656
  def Exec(self, feedback_fn):
5657
    """Perform the migration.
5658

5659
    """
5660
    feedback_fn("Migrating instance %s" % self.instance.name)
5661

    
5662
    self.feedback_fn = feedback_fn
5663

    
5664
    self.source_node = self.instance.primary_node
5665
    self.target_node = self.instance.secondary_nodes[0]
5666
    self.all_nodes = [self.source_node, self.target_node]
5667
    self.nodes_ip = {
5668
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5669
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5670
      }
5671

    
5672
    if self.cleanup:
5673
      return self._ExecCleanup()
5674
    else:
5675
      return self._ExecMigration()
5676

    
5677

    
5678
def _CreateBlockDev(lu, node, instance, device, force_create,
5679
                    info, force_open):
5680
  """Create a tree of block devices on a given node.
5681

5682
  If this device type has to be created on secondaries, create it and
5683
  all its children.
5684

5685
  If not, just recurse to children keeping the same 'force' value.
5686

5687
  @param lu: the lu on whose behalf we execute
5688
  @param node: the node on which to create the device
5689
  @type instance: L{objects.Instance}
5690
  @param instance: the instance which owns the device
5691
  @type device: L{objects.Disk}
5692
  @param device: the device to create
5693
  @type force_create: boolean
5694
  @param force_create: whether to force creation of this device; this
5695
      will be change to True whenever we find a device which has
5696
      CreateOnSecondary() attribute
5697
  @param info: the extra 'metadata' we should attach to the device
5698
      (this will be represented as a LVM tag)
5699
  @type force_open: boolean
5700
  @param force_open: this parameter will be passes to the
5701
      L{backend.BlockdevCreate} function where it specifies
5702
      whether we run on primary or not, and it affects both
5703
      the child assembly and the device own Open() execution
5704

5705
  """
5706
  if device.CreateOnSecondary():
5707
    force_create = True
5708

    
5709
  if device.children:
5710
    for child in device.children:
5711
      _CreateBlockDev(lu, node, instance, child, force_create,
5712
                      info, force_open)
5713

    
5714
  if not force_create:
5715
    return
5716

    
5717
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5718

    
5719

    
5720
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5721
  """Create a single block device on a given node.
5722

5723
  This will not recurse over children of the device, so they must be
5724
  created in advance.
5725

5726
  @param lu: the lu on whose behalf we execute
5727
  @param node: the node on which to create the device
5728
  @type instance: L{objects.Instance}
5729
  @param instance: the instance which owns the device
5730
  @type device: L{objects.Disk}
5731
  @param device: the device to create
5732
  @param info: the extra 'metadata' we should attach to the device
5733
      (this will be represented as a LVM tag)
5734
  @type force_open: boolean
5735
  @param force_open: this parameter will be passes to the
5736
      L{backend.BlockdevCreate} function where it specifies
5737
      whether we run on primary or not, and it affects both
5738
      the child assembly and the device own Open() execution
5739

5740
  """
5741
  lu.cfg.SetDiskID(device, node)
5742
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5743
                                       instance.name, force_open, info)
5744
  result.Raise("Can't create block device %s on"
5745
               " node %s for instance %s" % (device, node, instance.name))
5746
  if device.physical_id is None:
5747
    device.physical_id = result.payload
5748

    
5749

    
5750
def _GenerateUniqueNames(lu, exts):
5751
  """Generate a suitable LV name.
5752

5753
  This will generate a logical volume name for the given instance.
5754

5755
  """
5756
  results = []
5757
  for val in exts:
5758
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5759
    results.append("%s%s" % (new_id, val))
5760
  return results
5761

    
5762

    
5763
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5764
                         p_minor, s_minor):
5765
  """Generate a drbd8 device complete with its children.
5766

5767
  """
5768
  port = lu.cfg.AllocatePort()
5769
  vgname = lu.cfg.GetVGName()
5770
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5771
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5772
                          logical_id=(vgname, names[0]))
5773
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5774
                          logical_id=(vgname, names[1]))
5775
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5776
                          logical_id=(primary, secondary, port,
5777
                                      p_minor, s_minor,
5778
                                      shared_secret),
5779
                          children=[dev_data, dev_meta],
5780
                          iv_name=iv_name)
5781
  return drbd_dev
5782

    
5783

    
5784
def _GenerateDiskTemplate(lu, template_name,
5785
                          instance_name, primary_node,
5786
                          secondary_nodes, disk_info,
5787
                          file_storage_dir, file_driver,
5788
                          base_index):
5789
  """Generate the entire disk layout for a given template type.
5790

5791
  """
5792
  #TODO: compute space requirements
5793

    
5794
  vgname = lu.cfg.GetVGName()
5795
  disk_count = len(disk_info)
5796
  disks = []
5797
  if template_name == constants.DT_DISKLESS:
5798
    pass
5799
  elif template_name == constants.DT_PLAIN:
5800
    if len(secondary_nodes) != 0:
5801
      raise errors.ProgrammerError("Wrong template configuration")
5802

    
5803
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5804
                                      for i in range(disk_count)])
5805
    for idx, disk in enumerate(disk_info):
5806
      disk_index = idx + base_index
5807
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5808
                              logical_id=(vgname, names[idx]),
5809
                              iv_name="disk/%d" % disk_index,
5810
                              mode=disk["mode"])
5811
      disks.append(disk_dev)
5812
  elif template_name == constants.DT_DRBD8:
5813
    if len(secondary_nodes) != 1:
5814
      raise errors.ProgrammerError("Wrong template configuration")
5815
    remote_node = secondary_nodes[0]
5816
    minors = lu.cfg.AllocateDRBDMinor(
5817
      [primary_node, remote_node] * len(disk_info), instance_name)
5818

    
5819
    names = []
5820
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5821
                                               for i in range(disk_count)]):
5822
      names.append(lv_prefix + "_data")
5823
      names.append(lv_prefix + "_meta")
5824
    for idx, disk in enumerate(disk_info):
5825
      disk_index = idx + base_index
5826
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5827
                                      disk["size"], names[idx*2:idx*2+2],
5828
                                      "disk/%d" % disk_index,
5829
                                      minors[idx*2], minors[idx*2+1])
5830
      disk_dev.mode = disk["mode"]
5831
      disks.append(disk_dev)
5832
  elif template_name == constants.DT_FILE:
5833
    if len(secondary_nodes) != 0:
5834
      raise errors.ProgrammerError("Wrong template configuration")
5835

    
5836
    _RequireFileStorage()
5837

    
5838
    for idx, disk in enumerate(disk_info):
5839
      disk_index = idx + base_index
5840
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5841
                              iv_name="disk/%d" % disk_index,
5842
                              logical_id=(file_driver,
5843
                                          "%s/disk%d" % (file_storage_dir,
5844
                                                         disk_index)),
5845
                              mode=disk["mode"])
5846
      disks.append(disk_dev)
5847
  else:
5848
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5849
  return disks
5850

    
5851

    
5852
def _GetInstanceInfoText(instance):
5853
  """Compute that text that should be added to the disk's metadata.
5854

5855
  """
5856
  return "originstname+%s" % instance.name
5857

    
5858

    
5859
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5860
  """Create all disks for an instance.
5861

5862
  This abstracts away some work from AddInstance.
5863

5864
  @type lu: L{LogicalUnit}
5865
  @param lu: the logical unit on whose behalf we execute
5866
  @type instance: L{objects.Instance}
5867
  @param instance: the instance whose disks we should create
5868
  @type to_skip: list
5869
  @param to_skip: list of indices to skip
5870
  @type target_node: string
5871
  @param target_node: if passed, overrides the target node for creation
5872
  @rtype: boolean
5873
  @return: the success of the creation
5874

5875
  """
5876
  info = _GetInstanceInfoText(instance)
5877
  if target_node is None:
5878
    pnode = instance.primary_node
5879
    all_nodes = instance.all_nodes
5880
  else:
5881
    pnode = target_node
5882
    all_nodes = [pnode]
5883

    
5884
  if instance.disk_template == constants.DT_FILE:
5885
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5886
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5887

    
5888
    result.Raise("Failed to create directory '%s' on"
5889
                 " node %s" % (file_storage_dir, pnode))
5890

    
5891
  # Note: this needs to be kept in sync with adding of disks in
5892
  # LUSetInstanceParams
5893
  for idx, device in enumerate(instance.disks):
5894
    if to_skip and idx in to_skip:
5895
      continue
5896
    logging.info("Creating volume %s for instance %s",
5897
                 device.iv_name, instance.name)
5898
    #HARDCODE
5899
    for node in all_nodes:
5900
      f_create = node == pnode
5901
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5902

    
5903

    
5904
def _RemoveDisks(lu, instance, target_node=None):
5905
  """Remove all disks for an instance.
5906

5907
  This abstracts away some work from `AddInstance()` and
5908
  `RemoveInstance()`. Note that in case some of the devices couldn't
5909
  be removed, the removal will continue with the other ones (compare
5910
  with `_CreateDisks()`).
5911

5912
  @type lu: L{LogicalUnit}
5913
  @param lu: the logical unit on whose behalf we execute
5914
  @type instance: L{objects.Instance}
5915
  @param instance: the instance whose disks we should remove
5916
  @type target_node: string
5917
  @param target_node: used to override the node on which to remove the disks
5918
  @rtype: boolean
5919
  @return: the success of the removal
5920

5921
  """
5922
  logging.info("Removing block devices for instance %s", instance.name)
5923

    
5924
  all_result = True
5925
  for device in instance.disks:
5926
    if target_node:
5927
      edata = [(target_node, device)]
5928
    else:
5929
      edata = device.ComputeNodeTree(instance.primary_node)
5930
    for node, disk in edata:
5931
      lu.cfg.SetDiskID(disk, node)
5932
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5933
      if msg:
5934
        lu.LogWarning("Could not remove block device %s on node %s,"
5935
                      " continuing anyway: %s", device.iv_name, node, msg)
5936
        all_result = False
5937

    
5938
  if instance.disk_template == constants.DT_FILE:
5939
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5940
    if target_node:
5941
      tgt = target_node
5942
    else:
5943
      tgt = instance.primary_node
5944
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5945
    if result.fail_msg:
5946
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5947
                    file_storage_dir, instance.primary_node, result.fail_msg)
5948
      all_result = False
5949

    
5950
  return all_result
5951

    
5952

    
5953
def _ComputeDiskSize(disk_template, disks):
5954
  """Compute disk size requirements in the volume group
5955

5956
  """
5957
  # Required free disk space as a function of disk and swap space
5958
  req_size_dict = {
5959
    constants.DT_DISKLESS: None,
5960
    constants.DT_PLAIN: sum(d["size"] for d in disks),
5961
    # 128 MB are added for drbd metadata for each disk
5962
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5963
    constants.DT_FILE: None,
5964
  }
5965

    
5966
  if disk_template not in req_size_dict:
5967
    raise errors.ProgrammerError("Disk template '%s' size requirement"
5968
                                 " is unknown" %  disk_template)
5969

    
5970
  return req_size_dict[disk_template]
5971

    
5972

    
5973
def _CheckHVParams(lu, nodenames, hvname, hvparams):
5974
  """Hypervisor parameter validation.
5975

5976
  This function abstract the hypervisor parameter validation to be
5977
  used in both instance create and instance modify.
5978

5979
  @type lu: L{LogicalUnit}
5980
  @param lu: the logical unit for which we check
5981
  @type nodenames: list
5982
  @param nodenames: the list of nodes on which we should check
5983
  @type hvname: string
5984
  @param hvname: the name of the hypervisor we should use
5985
  @type hvparams: dict
5986
  @param hvparams: the parameters which we need to check
5987
  @raise errors.OpPrereqError: if the parameters are not valid
5988

5989
  """
5990
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5991
                                                  hvname,
5992
                                                  hvparams)
5993
  for node in nodenames:
5994
    info = hvinfo[node]
5995
    if info.offline:
5996
      continue
5997
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
5998

    
5999

    
6000
class LUCreateInstance(LogicalUnit):
6001
  """Create an instance.
6002

6003
  """
6004
  HPATH = "instance-add"
6005
  HTYPE = constants.HTYPE_INSTANCE
6006
  _OP_REQP = ["instance_name", "disks",
6007
              "mode", "start",
6008
              "wait_for_sync", "ip_check", "nics",
6009
              "hvparams", "beparams"]
6010
  REQ_BGL = False
6011

    
6012
  def CheckArguments(self):
6013
    """Check arguments.
6014

6015
    """
6016
    # set optional parameters to none if they don't exist
6017
    for attr in ["pnode", "snode", "iallocator", "hypervisor",
6018
                 "disk_template", "identify_defaults"]:
6019
      if not hasattr(self.op, attr):
6020
        setattr(self.op, attr, None)
6021

    
6022
    # do not require name_check to ease forward/backward compatibility
6023
    # for tools
6024
    if not hasattr(self.op, "name_check"):
6025
      self.op.name_check = True
6026
    if not hasattr(self.op, "no_install"):
6027
      self.op.no_install = False
6028
    if self.op.no_install and self.op.start:
6029
      self.LogInfo("No-installation mode selected, disabling startup")
6030
      self.op.start = False
6031
    # validate/normalize the instance name
6032
    self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6033
    if self.op.ip_check and not self.op.name_check:
6034
      # TODO: make the ip check more flexible and not depend on the name check
6035
      raise errors.OpPrereqError("Cannot do ip checks without a name check",
6036
                                 errors.ECODE_INVAL)
6037
    # check disk information: either all adopt, or no adopt
6038
    has_adopt = has_no_adopt = False
6039
    for disk in self.op.disks:
6040
      if "adopt" in disk:
6041
        has_adopt = True
6042
      else:
6043
        has_no_adopt = True
6044
    if has_adopt and has_no_adopt:
6045
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6046
                                 errors.ECODE_INVAL)
6047
    if has_adopt:
6048
      if self.op.disk_template != constants.DT_PLAIN:
6049
        raise errors.OpPrereqError("Disk adoption is only supported for the"
6050
                                   " 'plain' disk template",
6051
                                   errors.ECODE_INVAL)
6052
      if self.op.iallocator is not None:
6053
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6054
                                   " iallocator script", errors.ECODE_INVAL)
6055
      if self.op.mode == constants.INSTANCE_IMPORT:
6056
        raise errors.OpPrereqError("Disk adoption not allowed for"
6057
                                   " instance import", errors.ECODE_INVAL)
6058

    
6059
    self.adopt_disks = has_adopt
6060

    
6061
    # verify creation mode
6062
    if self.op.mode not in (constants.INSTANCE_CREATE,
6063
                            constants.INSTANCE_IMPORT):
6064
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6065
                                 self.op.mode, errors.ECODE_INVAL)
6066

    
6067
    # instance name verification
6068
    if self.op.name_check:
6069
      self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6070
      self.op.instance_name = self.hostname1.name
6071
      # used in CheckPrereq for ip ping check
6072
      self.check_ip = self.hostname1.ip
6073
    else:
6074
      self.check_ip = None
6075

    
6076
    # file storage checks
6077
    if (self.op.file_driver and
6078
        not self.op.file_driver in constants.FILE_DRIVER):
6079
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6080
                                 self.op.file_driver, errors.ECODE_INVAL)
6081

    
6082
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6083
      raise errors.OpPrereqError("File storage directory path not absolute",
6084
                                 errors.ECODE_INVAL)
6085

    
6086
    ### Node/iallocator related checks
6087
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
6088
      raise errors.OpPrereqError("One and only one of iallocator and primary"
6089
                                 " node must be given",
6090
                                 errors.ECODE_INVAL)
6091

    
6092
    if self.op.mode == constants.INSTANCE_IMPORT:
6093
      # On import force_variant must be True, because if we forced it at
6094
      # initial install, our only chance when importing it back is that it
6095
      # works again!
6096
      self.op.force_variant = True
6097

    
6098
      if self.op.no_install:
6099
        self.LogInfo("No-installation mode has no effect during import")
6100

    
6101
    else: # INSTANCE_CREATE
6102
      if getattr(self.op, "os_type", None) is None:
6103
        raise errors.OpPrereqError("No guest OS specified",
6104
                                   errors.ECODE_INVAL)
6105
      self.op.force_variant = getattr(self.op, "force_variant", False)
6106
      if self.op.disk_template is None:
6107
        raise errors.OpPrereqError("No disk template specified",
6108
                                   errors.ECODE_INVAL)
6109

    
6110
  def ExpandNames(self):
6111
    """ExpandNames for CreateInstance.
6112

6113
    Figure out the right locks for instance creation.
6114

6115
    """
6116
    self.needed_locks = {}
6117

    
6118
    instance_name = self.op.instance_name
6119
    # this is just a preventive check, but someone might still add this
6120
    # instance in the meantime, and creation will fail at lock-add time
6121
    if instance_name in self.cfg.GetInstanceList():
6122
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6123
                                 instance_name, errors.ECODE_EXISTS)
6124

    
6125
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6126

    
6127
    if self.op.iallocator:
6128
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6129
    else:
6130
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6131
      nodelist = [self.op.pnode]
6132
      if self.op.snode is not None:
6133
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6134
        nodelist.append(self.op.snode)
6135
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6136

    
6137
    # in case of import lock the source node too
6138
    if self.op.mode == constants.INSTANCE_IMPORT:
6139
      src_node = getattr(self.op, "src_node", None)
6140
      src_path = getattr(self.op, "src_path", None)
6141

    
6142
      if src_path is None:
6143
        self.op.src_path = src_path = self.op.instance_name
6144

    
6145
      if src_node is None:
6146
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6147
        self.op.src_node = None
6148
        if os.path.isabs(src_path):
6149
          raise errors.OpPrereqError("Importing an instance from an absolute"
6150
                                     " path requires a source node option.",
6151
                                     errors.ECODE_INVAL)
6152
      else:
6153
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6154
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6155
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6156
        if not os.path.isabs(src_path):
6157
          self.op.src_path = src_path = \
6158
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6159

    
6160
  def _RunAllocator(self):
6161
    """Run the allocator based on input opcode.
6162

6163
    """
6164
    nics = [n.ToDict() for n in self.nics]
6165
    ial = IAllocator(self.cfg, self.rpc,
6166
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6167
                     name=self.op.instance_name,
6168
                     disk_template=self.op.disk_template,
6169
                     tags=[],
6170
                     os=self.op.os_type,
6171
                     vcpus=self.be_full[constants.BE_VCPUS],
6172
                     mem_size=self.be_full[constants.BE_MEMORY],
6173
                     disks=self.disks,
6174
                     nics=nics,
6175
                     hypervisor=self.op.hypervisor,
6176
                     )
6177

    
6178
    ial.Run(self.op.iallocator)
6179

    
6180
    if not ial.success:
6181
      raise errors.OpPrereqError("Can't compute nodes using"
6182
                                 " iallocator '%s': %s" %
6183
                                 (self.op.iallocator, ial.info),
6184
                                 errors.ECODE_NORES)
6185
    if len(ial.result) != ial.required_nodes:
6186
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6187
                                 " of nodes (%s), required %s" %
6188
                                 (self.op.iallocator, len(ial.result),
6189
                                  ial.required_nodes), errors.ECODE_FAULT)
6190
    self.op.pnode = ial.result[0]
6191
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6192
                 self.op.instance_name, self.op.iallocator,
6193
                 utils.CommaJoin(ial.result))
6194
    if ial.required_nodes == 2:
6195
      self.op.snode = ial.result[1]
6196

    
6197
  def BuildHooksEnv(self):
6198
    """Build hooks env.
6199

6200
    This runs on master, primary and secondary nodes of the instance.
6201

6202
    """
6203
    env = {
6204
      "ADD_MODE": self.op.mode,
6205
      }
6206
    if self.op.mode == constants.INSTANCE_IMPORT:
6207
      env["SRC_NODE"] = self.op.src_node
6208
      env["SRC_PATH"] = self.op.src_path
6209
      env["SRC_IMAGES"] = self.src_images
6210

    
6211
    env.update(_BuildInstanceHookEnv(
6212
      name=self.op.instance_name,
6213
      primary_node=self.op.pnode,
6214
      secondary_nodes=self.secondaries,
6215
      status=self.op.start,
6216
      os_type=self.op.os_type,
6217
      memory=self.be_full[constants.BE_MEMORY],
6218
      vcpus=self.be_full[constants.BE_VCPUS],
6219
      nics=_NICListToTuple(self, self.nics),
6220
      disk_template=self.op.disk_template,
6221
      disks=[(d["size"], d["mode"]) for d in self.disks],
6222
      bep=self.be_full,
6223
      hvp=self.hv_full,
6224
      hypervisor_name=self.op.hypervisor,
6225
    ))
6226

    
6227
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6228
          self.secondaries)
6229
    return env, nl, nl
6230

    
6231
  def _ReadExportInfo(self):
6232
    """Reads the export information from disk.
6233

6234
    It will override the opcode source node and path with the actual
6235
    information, if these two were not specified before.
6236

6237
    @return: the export information
6238

6239
    """
6240
    assert self.op.mode == constants.INSTANCE_IMPORT
6241

    
6242
    src_node = self.op.src_node
6243
    src_path = self.op.src_path
6244

    
6245
    if src_node is None:
6246
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6247
      exp_list = self.rpc.call_export_list(locked_nodes)
6248
      found = False
6249
      for node in exp_list:
6250
        if exp_list[node].fail_msg:
6251
          continue
6252
        if src_path in exp_list[node].payload:
6253
          found = True
6254
          self.op.src_node = src_node = node
6255
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6256
                                                       src_path)
6257
          break
6258
      if not found:
6259
        raise errors.OpPrereqError("No export found for relative path %s" %
6260
                                    src_path, errors.ECODE_INVAL)
6261

    
6262
    _CheckNodeOnline(self, src_node)
6263
    result = self.rpc.call_export_info(src_node, src_path)
6264
    result.Raise("No export or invalid export found in dir %s" % src_path)
6265

    
6266
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6267
    if not export_info.has_section(constants.INISECT_EXP):
6268
      raise errors.ProgrammerError("Corrupted export config",
6269
                                   errors.ECODE_ENVIRON)
6270

    
6271
    ei_version = export_info.get(constants.INISECT_EXP, "version")
6272
    if (int(ei_version) != constants.EXPORT_VERSION):
6273
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6274
                                 (ei_version, constants.EXPORT_VERSION),
6275
                                 errors.ECODE_ENVIRON)
6276
    return export_info
6277

    
6278
  def _ReadExportParams(self, einfo):
6279
    """Use export parameters as defaults.
6280

6281
    In case the opcode doesn't specify (as in override) some instance
6282
    parameters, then try to use them from the export information, if
6283
    that declares them.
6284

6285
    """
6286
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6287

    
6288
    if self.op.disk_template is None:
6289
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
6290
        self.op.disk_template = einfo.get(constants.INISECT_INS,
6291
                                          "disk_template")
6292
      else:
6293
        raise errors.OpPrereqError("No disk template specified and the export"
6294
                                   " is missing the disk_template information",
6295
                                   errors.ECODE_INVAL)
6296

    
6297
    if not self.op.disks:
6298
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
6299
        disks = []
6300
        # TODO: import the disk iv_name too
6301
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6302
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6303
          disks.append({"size": disk_sz})
6304
        self.op.disks = disks
6305
      else:
6306
        raise errors.OpPrereqError("No disk info specified and the export"
6307
                                   " is missing the disk information",
6308
                                   errors.ECODE_INVAL)
6309

    
6310
    if (not self.op.nics and
6311
        einfo.has_option(constants.INISECT_INS, "nic_count")):
6312
      nics = []
6313
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6314
        ndict = {}
6315
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6316
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6317
          ndict[name] = v
6318
        nics.append(ndict)
6319
      self.op.nics = nics
6320

    
6321
    if (self.op.hypervisor is None and
6322
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
6323
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6324
    if einfo.has_section(constants.INISECT_HYP):
6325
      # use the export parameters but do not override the ones
6326
      # specified by the user
6327
      for name, value in einfo.items(constants.INISECT_HYP):
6328
        if name not in self.op.hvparams:
6329
          self.op.hvparams[name] = value
6330

    
6331
    if einfo.has_section(constants.INISECT_BEP):
6332
      # use the parameters, without overriding
6333
      for name, value in einfo.items(constants.INISECT_BEP):
6334
        if name not in self.op.beparams:
6335
          self.op.beparams[name] = value
6336
    else:
6337
      # try to read the parameters old style, from the main section
6338
      for name in constants.BES_PARAMETERS:
6339
        if (name not in self.op.beparams and
6340
            einfo.has_option(constants.INISECT_INS, name)):
6341
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6342

    
6343
  def _RevertToDefaults(self, cluster):
6344
    """Revert the instance parameters to the default values.
6345

6346
    """
6347
    # hvparams
6348
    hv_defs = cluster.GetHVDefaults(self.op.hypervisor, self.op.os_type)
6349
    for name in self.op.hvparams.keys():
6350
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6351
        del self.op.hvparams[name]
6352
    # beparams
6353
    be_defs = cluster.beparams.get(constants.PP_DEFAULT, {})
6354
    for name in self.op.beparams.keys():
6355
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
6356
        del self.op.beparams[name]
6357
    # nic params
6358
    nic_defs = cluster.nicparams.get(constants.PP_DEFAULT, {})
6359
    for nic in self.op.nics:
6360
      for name in constants.NICS_PARAMETERS:
6361
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6362
          del nic[name]
6363

    
6364
  def CheckPrereq(self):
6365
    """Check prerequisites.
6366

6367
    """
6368
    if self.op.mode == constants.INSTANCE_IMPORT:
6369
      export_info = self._ReadExportInfo()
6370
      self._ReadExportParams(export_info)
6371

    
6372
    _CheckDiskTemplate(self.op.disk_template)
6373

    
6374
    if (not self.cfg.GetVGName() and
6375
        self.op.disk_template not in constants.DTS_NOT_LVM):
6376
      raise errors.OpPrereqError("Cluster does not support lvm-based"
6377
                                 " instances", errors.ECODE_STATE)
6378

    
6379
    if self.op.hypervisor is None:
6380
      self.op.hypervisor = self.cfg.GetHypervisorType()
6381

    
6382
    cluster = self.cfg.GetClusterInfo()
6383
    enabled_hvs = cluster.enabled_hypervisors
6384
    if self.op.hypervisor not in enabled_hvs:
6385
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6386
                                 " cluster (%s)" % (self.op.hypervisor,
6387
                                  ",".join(enabled_hvs)),
6388
                                 errors.ECODE_STATE)
6389

    
6390
    # check hypervisor parameter syntax (locally)
6391
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6392
    filled_hvp = objects.FillDict(cluster.GetHVDefaults(self.op.hypervisor,
6393
                                                        self.op.os_type),
6394
                                  self.op.hvparams)
6395
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6396
    hv_type.CheckParameterSyntax(filled_hvp)
6397
    self.hv_full = filled_hvp
6398
    # check that we don't specify global parameters on an instance
6399
    _CheckGlobalHvParams(self.op.hvparams)
6400

    
6401
    # fill and remember the beparams dict
6402
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6403
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
6404
                                    self.op.beparams)
6405

    
6406
    # now that hvp/bep are in final format, let's reset to defaults,
6407
    # if told to do so
6408
    if self.op.identify_defaults:
6409
      self._RevertToDefaults(cluster)
6410

    
6411
    # NIC buildup
6412
    self.nics = []
6413
    for idx, nic in enumerate(self.op.nics):
6414
      nic_mode_req = nic.get("mode", None)
6415
      nic_mode = nic_mode_req
6416
      if nic_mode is None:
6417
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6418

    
6419
      # in routed mode, for the first nic, the default ip is 'auto'
6420
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6421
        default_ip_mode = constants.VALUE_AUTO
6422
      else:
6423
        default_ip_mode = constants.VALUE_NONE
6424

    
6425
      # ip validity checks
6426
      ip = nic.get("ip", default_ip_mode)
6427
      if ip is None or ip.lower() == constants.VALUE_NONE:
6428
        nic_ip = None
6429
      elif ip.lower() == constants.VALUE_AUTO:
6430
        if not self.op.name_check:
6431
          raise errors.OpPrereqError("IP address set to auto but name checks"
6432
                                     " have been skipped. Aborting.",
6433
                                     errors.ECODE_INVAL)
6434
        nic_ip = self.hostname1.ip
6435
      else:
6436
        if not utils.IsValidIP(ip):
6437
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6438
                                     " like a valid IP" % ip,
6439
                                     errors.ECODE_INVAL)
6440
        nic_ip = ip
6441

    
6442
      # TODO: check the ip address for uniqueness
6443
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6444
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
6445
                                   errors.ECODE_INVAL)
6446

    
6447
      # MAC address verification
6448
      mac = nic.get("mac", constants.VALUE_AUTO)
6449
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6450
        mac = utils.NormalizeAndValidateMac(mac)
6451

    
6452
        try:
6453
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
6454
        except errors.ReservationError:
6455
          raise errors.OpPrereqError("MAC address %s already in use"
6456
                                     " in cluster" % mac,
6457
                                     errors.ECODE_NOTUNIQUE)
6458

    
6459
      # bridge verification
6460
      bridge = nic.get("bridge", None)
6461
      link = nic.get("link", None)
6462
      if bridge and link:
6463
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6464
                                   " at the same time", errors.ECODE_INVAL)
6465
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6466
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6467
                                   errors.ECODE_INVAL)
6468
      elif bridge:
6469
        link = bridge
6470

    
6471
      nicparams = {}
6472
      if nic_mode_req:
6473
        nicparams[constants.NIC_MODE] = nic_mode_req
6474
      if link:
6475
        nicparams[constants.NIC_LINK] = link
6476

    
6477
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
6478
                                      nicparams)
6479
      objects.NIC.CheckParameterSyntax(check_params)
6480
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6481

    
6482
    # disk checks/pre-build
6483
    self.disks = []
6484
    for disk in self.op.disks:
6485
      mode = disk.get("mode", constants.DISK_RDWR)
6486
      if mode not in constants.DISK_ACCESS_SET:
6487
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6488
                                   mode, errors.ECODE_INVAL)
6489
      size = disk.get("size", None)
6490
      if size is None:
6491
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6492
      try:
6493
        size = int(size)
6494
      except (TypeError, ValueError):
6495
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6496
                                   errors.ECODE_INVAL)
6497
      new_disk = {"size": size, "mode": mode}
6498
      if "adopt" in disk:
6499
        new_disk["adopt"] = disk["adopt"]
6500
      self.disks.append(new_disk)
6501

    
6502
    if self.op.mode == constants.INSTANCE_IMPORT:
6503

    
6504
      # Check that the new instance doesn't have less disks than the export
6505
      instance_disks = len(self.disks)
6506
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6507
      if instance_disks < export_disks:
6508
        raise errors.OpPrereqError("Not enough disks to import."
6509
                                   " (instance: %d, export: %d)" %
6510
                                   (instance_disks, export_disks),
6511
                                   errors.ECODE_INVAL)
6512

    
6513
      disk_images = []
6514
      for idx in range(export_disks):
6515
        option = 'disk%d_dump' % idx
6516
        if export_info.has_option(constants.INISECT_INS, option):
6517
          # FIXME: are the old os-es, disk sizes, etc. useful?
6518
          export_name = export_info.get(constants.INISECT_INS, option)
6519
          image = utils.PathJoin(self.op.src_path, export_name)
6520
          disk_images.append(image)
6521
        else:
6522
          disk_images.append(False)
6523

    
6524
      self.src_images = disk_images
6525

    
6526
      old_name = export_info.get(constants.INISECT_INS, 'name')
6527
      try:
6528
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6529
      except (TypeError, ValueError), err:
6530
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
6531
                                   " an integer: %s" % str(err),
6532
                                   errors.ECODE_STATE)
6533
      if self.op.instance_name == old_name:
6534
        for idx, nic in enumerate(self.nics):
6535
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6536
            nic_mac_ini = 'nic%d_mac' % idx
6537
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6538

    
6539
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6540

    
6541
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
6542
    if self.op.ip_check:
6543
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6544
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6545
                                   (self.check_ip, self.op.instance_name),
6546
                                   errors.ECODE_NOTUNIQUE)
6547

    
6548
    #### mac address generation
6549
    # By generating here the mac address both the allocator and the hooks get
6550
    # the real final mac address rather than the 'auto' or 'generate' value.
6551
    # There is a race condition between the generation and the instance object
6552
    # creation, which means that we know the mac is valid now, but we're not
6553
    # sure it will be when we actually add the instance. If things go bad
6554
    # adding the instance will abort because of a duplicate mac, and the
6555
    # creation job will fail.
6556
    for nic in self.nics:
6557
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6558
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6559

    
6560
    #### allocator run
6561

    
6562
    if self.op.iallocator is not None:
6563
      self._RunAllocator()
6564

    
6565
    #### node related checks
6566

    
6567
    # check primary node
6568
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6569
    assert self.pnode is not None, \
6570
      "Cannot retrieve locked node %s" % self.op.pnode
6571
    if pnode.offline:
6572
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6573
                                 pnode.name, errors.ECODE_STATE)
6574
    if pnode.drained:
6575
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6576
                                 pnode.name, errors.ECODE_STATE)
6577

    
6578
    self.secondaries = []
6579

    
6580
    # mirror node verification
6581
    if self.op.disk_template in constants.DTS_NET_MIRROR:
6582
      if self.op.snode is None:
6583
        raise errors.OpPrereqError("The networked disk templates need"
6584
                                   " a mirror node", errors.ECODE_INVAL)
6585
      if self.op.snode == pnode.name:
6586
        raise errors.OpPrereqError("The secondary node cannot be the"
6587
                                   " primary node.", errors.ECODE_INVAL)
6588
      _CheckNodeOnline(self, self.op.snode)
6589
      _CheckNodeNotDrained(self, self.op.snode)
6590
      self.secondaries.append(self.op.snode)
6591

    
6592
    nodenames = [pnode.name] + self.secondaries
6593

    
6594
    req_size = _ComputeDiskSize(self.op.disk_template,
6595
                                self.disks)
6596

    
6597
    # Check lv size requirements, if not adopting
6598
    if req_size is not None and not self.adopt_disks:
6599
      _CheckNodesFreeDisk(self, nodenames, req_size)
6600

    
6601
    if self.adopt_disks: # instead, we must check the adoption data
6602
      all_lvs = set([i["adopt"] for i in self.disks])
6603
      if len(all_lvs) != len(self.disks):
6604
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
6605
                                   errors.ECODE_INVAL)
6606
      for lv_name in all_lvs:
6607
        try:
6608
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6609
        except errors.ReservationError:
6610
          raise errors.OpPrereqError("LV named %s used by another instance" %
6611
                                     lv_name, errors.ECODE_NOTUNIQUE)
6612

    
6613
      node_lvs = self.rpc.call_lv_list([pnode.name],
6614
                                       self.cfg.GetVGName())[pnode.name]
6615
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6616
      node_lvs = node_lvs.payload
6617
      delta = all_lvs.difference(node_lvs.keys())
6618
      if delta:
6619
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
6620
                                   utils.CommaJoin(delta),
6621
                                   errors.ECODE_INVAL)
6622
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6623
      if online_lvs:
6624
        raise errors.OpPrereqError("Online logical volumes found, cannot"
6625
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
6626
                                   errors.ECODE_STATE)
6627
      # update the size of disk based on what is found
6628
      for dsk in self.disks:
6629
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6630

    
6631
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6632

    
6633
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6634

    
6635
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6636

    
6637
    # memory check on primary node
6638
    if self.op.start:
6639
      _CheckNodeFreeMemory(self, self.pnode.name,
6640
                           "creating instance %s" % self.op.instance_name,
6641
                           self.be_full[constants.BE_MEMORY],
6642
                           self.op.hypervisor)
6643

    
6644
    self.dry_run_result = list(nodenames)
6645

    
6646
  def Exec(self, feedback_fn):
6647
    """Create and add the instance to the cluster.
6648

6649
    """
6650
    instance = self.op.instance_name
6651
    pnode_name = self.pnode.name
6652

    
6653
    ht_kind = self.op.hypervisor
6654
    if ht_kind in constants.HTS_REQ_PORT:
6655
      network_port = self.cfg.AllocatePort()
6656
    else:
6657
      network_port = None
6658

    
6659
    if constants.ENABLE_FILE_STORAGE:
6660
      # this is needed because os.path.join does not accept None arguments
6661
      if self.op.file_storage_dir is None:
6662
        string_file_storage_dir = ""
6663
      else:
6664
        string_file_storage_dir = self.op.file_storage_dir
6665

    
6666
      # build the full file storage dir path
6667
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6668
                                        string_file_storage_dir, instance)
6669
    else:
6670
      file_storage_dir = ""
6671

    
6672

    
6673
    disks = _GenerateDiskTemplate(self,
6674
                                  self.op.disk_template,
6675
                                  instance, pnode_name,
6676
                                  self.secondaries,
6677
                                  self.disks,
6678
                                  file_storage_dir,
6679
                                  self.op.file_driver,
6680
                                  0)
6681

    
6682
    iobj = objects.Instance(name=instance, os=self.op.os_type,
6683
                            primary_node=pnode_name,
6684
                            nics=self.nics, disks=disks,
6685
                            disk_template=self.op.disk_template,
6686
                            admin_up=False,
6687
                            network_port=network_port,
6688
                            beparams=self.op.beparams,
6689
                            hvparams=self.op.hvparams,
6690
                            hypervisor=self.op.hypervisor,
6691
                            )
6692

    
6693
    if self.adopt_disks:
6694
      # rename LVs to the newly-generated names; we need to construct
6695
      # 'fake' LV disks with the old data, plus the new unique_id
6696
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6697
      rename_to = []
6698
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6699
        rename_to.append(t_dsk.logical_id)
6700
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6701
        self.cfg.SetDiskID(t_dsk, pnode_name)
6702
      result = self.rpc.call_blockdev_rename(pnode_name,
6703
                                             zip(tmp_disks, rename_to))
6704
      result.Raise("Failed to rename adoped LVs")
6705
    else:
6706
      feedback_fn("* creating instance disks...")
6707
      try:
6708
        _CreateDisks(self, iobj)
6709
      except errors.OpExecError:
6710
        self.LogWarning("Device creation failed, reverting...")
6711
        try:
6712
          _RemoveDisks(self, iobj)
6713
        finally:
6714
          self.cfg.ReleaseDRBDMinors(instance)
6715
          raise
6716

    
6717
    feedback_fn("adding instance %s to cluster config" % instance)
6718

    
6719
    self.cfg.AddInstance(iobj, self.proc.GetECId())
6720

    
6721
    # Declare that we don't want to remove the instance lock anymore, as we've
6722
    # added the instance to the config
6723
    del self.remove_locks[locking.LEVEL_INSTANCE]
6724
    # Unlock all the nodes
6725
    if self.op.mode == constants.INSTANCE_IMPORT:
6726
      nodes_keep = [self.op.src_node]
6727
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6728
                       if node != self.op.src_node]
6729
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6730
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6731
    else:
6732
      self.context.glm.release(locking.LEVEL_NODE)
6733
      del self.acquired_locks[locking.LEVEL_NODE]
6734

    
6735
    if self.op.wait_for_sync:
6736
      disk_abort = not _WaitForSync(self, iobj)
6737
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
6738
      # make sure the disks are not degraded (still sync-ing is ok)
6739
      time.sleep(15)
6740
      feedback_fn("* checking mirrors status")
6741
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6742
    else:
6743
      disk_abort = False
6744

    
6745
    if disk_abort:
6746
      _RemoveDisks(self, iobj)
6747
      self.cfg.RemoveInstance(iobj.name)
6748
      # Make sure the instance lock gets removed
6749
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6750
      raise errors.OpExecError("There are some degraded disks for"
6751
                               " this instance")
6752

    
6753
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6754
      if self.op.mode == constants.INSTANCE_CREATE:
6755
        if not self.op.no_install:
6756
          feedback_fn("* running the instance OS create scripts...")
6757
          # FIXME: pass debug option from opcode to backend
6758
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6759
                                                 self.op.debug_level)
6760
          result.Raise("Could not add os for instance %s"
6761
                       " on node %s" % (instance, pnode_name))
6762

    
6763
      elif self.op.mode == constants.INSTANCE_IMPORT:
6764
        feedback_fn("* running the instance OS import scripts...")
6765
        src_node = self.op.src_node
6766
        src_images = self.src_images
6767
        cluster_name = self.cfg.GetClusterName()
6768
        # FIXME: pass debug option from opcode to backend
6769
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6770
                                                         src_node, src_images,
6771
                                                         cluster_name,
6772
                                                         self.op.debug_level)
6773
        msg = import_result.fail_msg
6774
        if msg:
6775
          self.LogWarning("Error while importing the disk images for instance"
6776
                          " %s on node %s: %s" % (instance, pnode_name, msg))
6777
      else:
6778
        # also checked in the prereq part
6779
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6780
                                     % self.op.mode)
6781

    
6782
    if self.op.start:
6783
      iobj.admin_up = True
6784
      self.cfg.Update(iobj, feedback_fn)
6785
      logging.info("Starting instance %s on node %s", instance, pnode_name)
6786
      feedback_fn("* starting instance...")
6787
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6788
      result.Raise("Could not start instance")
6789

    
6790
    return list(iobj.all_nodes)
6791

    
6792

    
6793
class LUConnectConsole(NoHooksLU):
6794
  """Connect to an instance's console.
6795

6796
  This is somewhat special in that it returns the command line that
6797
  you need to run on the master node in order to connect to the
6798
  console.
6799

6800
  """
6801
  _OP_REQP = ["instance_name"]
6802
  REQ_BGL = False
6803

    
6804
  def ExpandNames(self):
6805
    self._ExpandAndLockInstance()
6806

    
6807
  def CheckPrereq(self):
6808
    """Check prerequisites.
6809

6810
    This checks that the instance is in the cluster.
6811

6812
    """
6813
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6814
    assert self.instance is not None, \
6815
      "Cannot retrieve locked instance %s" % self.op.instance_name
6816
    _CheckNodeOnline(self, self.instance.primary_node)
6817

    
6818
  def Exec(self, feedback_fn):
6819
    """Connect to the console of an instance
6820

6821
    """
6822
    instance = self.instance
6823
    node = instance.primary_node
6824

    
6825
    node_insts = self.rpc.call_instance_list([node],
6826
                                             [instance.hypervisor])[node]
6827
    node_insts.Raise("Can't get node information from %s" % node)
6828

    
6829
    if instance.name not in node_insts.payload:
6830
      raise errors.OpExecError("Instance %s is not running." % instance.name)
6831

    
6832
    logging.debug("Connecting to console of %s on %s", instance.name, node)
6833

    
6834
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
6835
    cluster = self.cfg.GetClusterInfo()
6836
    # beparams and hvparams are passed separately, to avoid editing the
6837
    # instance and then saving the defaults in the instance itself.
6838
    hvparams = cluster.FillHV(instance)
6839
    beparams = cluster.FillBE(instance)
6840
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6841

    
6842
    # build ssh cmdline
6843
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6844

    
6845

    
6846
class LUReplaceDisks(LogicalUnit):
6847
  """Replace the disks of an instance.
6848

6849
  """
6850
  HPATH = "mirrors-replace"
6851
  HTYPE = constants.HTYPE_INSTANCE
6852
  _OP_REQP = ["instance_name", "mode", "disks"]
6853
  REQ_BGL = False
6854

    
6855
  def CheckArguments(self):
6856
    if not hasattr(self.op, "remote_node"):
6857
      self.op.remote_node = None
6858
    if not hasattr(self.op, "iallocator"):
6859
      self.op.iallocator = None
6860
    if not hasattr(self.op, "early_release"):
6861
      self.op.early_release = False
6862

    
6863
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6864
                                  self.op.iallocator)
6865

    
6866
  def ExpandNames(self):
6867
    self._ExpandAndLockInstance()
6868

    
6869
    if self.op.iallocator is not None:
6870
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6871

    
6872
    elif self.op.remote_node is not None:
6873
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6874
      self.op.remote_node = remote_node
6875

    
6876
      # Warning: do not remove the locking of the new secondary here
6877
      # unless DRBD8.AddChildren is changed to work in parallel;
6878
      # currently it doesn't since parallel invocations of
6879
      # FindUnusedMinor will conflict
6880
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6881
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6882

    
6883
    else:
6884
      self.needed_locks[locking.LEVEL_NODE] = []
6885
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6886

    
6887
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6888
                                   self.op.iallocator, self.op.remote_node,
6889
                                   self.op.disks, False, self.op.early_release)
6890

    
6891
    self.tasklets = [self.replacer]
6892

    
6893
  def DeclareLocks(self, level):
6894
    # If we're not already locking all nodes in the set we have to declare the
6895
    # instance's primary/secondary nodes.
6896
    if (level == locking.LEVEL_NODE and
6897
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6898
      self._LockInstancesNodes()
6899

    
6900
  def BuildHooksEnv(self):
6901
    """Build hooks env.
6902

6903
    This runs on the master, the primary and all the secondaries.
6904

6905
    """
6906
    instance = self.replacer.instance
6907
    env = {
6908
      "MODE": self.op.mode,
6909
      "NEW_SECONDARY": self.op.remote_node,
6910
      "OLD_SECONDARY": instance.secondary_nodes[0],
6911
      }
6912
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6913
    nl = [
6914
      self.cfg.GetMasterNode(),
6915
      instance.primary_node,
6916
      ]
6917
    if self.op.remote_node is not None:
6918
      nl.append(self.op.remote_node)
6919
    return env, nl, nl
6920

    
6921

    
6922
class LUEvacuateNode(LogicalUnit):
6923
  """Relocate the secondary instances from a node.
6924

6925
  """
6926
  HPATH = "node-evacuate"
6927
  HTYPE = constants.HTYPE_NODE
6928
  _OP_REQP = ["node_name"]
6929
  REQ_BGL = False
6930

    
6931
  def CheckArguments(self):
6932
    if not hasattr(self.op, "remote_node"):
6933
      self.op.remote_node = None
6934
    if not hasattr(self.op, "iallocator"):
6935
      self.op.iallocator = None
6936
    if not hasattr(self.op, "early_release"):
6937
      self.op.early_release = False
6938

    
6939
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6940
                                  self.op.remote_node,
6941
                                  self.op.iallocator)
6942

    
6943
  def ExpandNames(self):
6944
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6945

    
6946
    self.needed_locks = {}
6947

    
6948
    # Declare node locks
6949
    if self.op.iallocator is not None:
6950
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6951

    
6952
    elif self.op.remote_node is not None:
6953
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6954

    
6955
      # Warning: do not remove the locking of the new secondary here
6956
      # unless DRBD8.AddChildren is changed to work in parallel;
6957
      # currently it doesn't since parallel invocations of
6958
      # FindUnusedMinor will conflict
6959
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
6960
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6961

    
6962
    else:
6963
      raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
6964

    
6965
    # Create tasklets for replacing disks for all secondary instances on this
6966
    # node
6967
    names = []
6968
    tasklets = []
6969

    
6970
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6971
      logging.debug("Replacing disks for instance %s", inst.name)
6972
      names.append(inst.name)
6973

    
6974
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6975
                                self.op.iallocator, self.op.remote_node, [],
6976
                                True, self.op.early_release)
6977
      tasklets.append(replacer)
6978

    
6979
    self.tasklets = tasklets
6980
    self.instance_names = names
6981

    
6982
    # Declare instance locks
6983
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6984

    
6985
  def DeclareLocks(self, level):
6986
    # If we're not already locking all nodes in the set we have to declare the
6987
    # instance's primary/secondary nodes.
6988
    if (level == locking.LEVEL_NODE and
6989
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6990
      self._LockInstancesNodes()
6991

    
6992
  def BuildHooksEnv(self):
6993
    """Build hooks env.
6994

6995
    This runs on the master, the primary and all the secondaries.
6996

6997
    """
6998
    env = {
6999
      "NODE_NAME": self.op.node_name,
7000
      }
7001

    
7002
    nl = [self.cfg.GetMasterNode()]
7003

    
7004
    if self.op.remote_node is not None:
7005
      env["NEW_SECONDARY"] = self.op.remote_node
7006
      nl.append(self.op.remote_node)
7007

    
7008
    return (env, nl, nl)
7009

    
7010

    
7011
class TLReplaceDisks(Tasklet):
7012
  """Replaces disks for an instance.
7013

7014
  Note: Locking is not within the scope of this class.
7015

7016
  """
7017
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7018
               disks, delay_iallocator, early_release):
7019
    """Initializes this class.
7020

7021
    """
7022
    Tasklet.__init__(self, lu)
7023

    
7024
    # Parameters
7025
    self.instance_name = instance_name
7026
    self.mode = mode
7027
    self.iallocator_name = iallocator_name
7028
    self.remote_node = remote_node
7029
    self.disks = disks
7030
    self.delay_iallocator = delay_iallocator
7031
    self.early_release = early_release
7032

    
7033
    # Runtime data
7034
    self.instance = None
7035
    self.new_node = None
7036
    self.target_node = None
7037
    self.other_node = None
7038
    self.remote_node_info = None
7039
    self.node_secondary_ip = None
7040

    
7041
  @staticmethod
7042
  def CheckArguments(mode, remote_node, iallocator):
7043
    """Helper function for users of this class.
7044

7045
    """
7046
    # check for valid parameter combination
7047
    if mode == constants.REPLACE_DISK_CHG:
7048
      if remote_node is None and iallocator is None:
7049
        raise errors.OpPrereqError("When changing the secondary either an"
7050
                                   " iallocator script must be used or the"
7051
                                   " new node given", errors.ECODE_INVAL)
7052

    
7053
      if remote_node is not None and iallocator is not None:
7054
        raise errors.OpPrereqError("Give either the iallocator or the new"
7055
                                   " secondary, not both", errors.ECODE_INVAL)
7056

    
7057
    elif remote_node is not None or iallocator is not None:
7058
      # Not replacing the secondary
7059
      raise errors.OpPrereqError("The iallocator and new node options can"
7060
                                 " only be used when changing the"
7061
                                 " secondary node", errors.ECODE_INVAL)
7062

    
7063
  @staticmethod
7064
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7065
    """Compute a new secondary node using an IAllocator.
7066

7067
    """
7068
    ial = IAllocator(lu.cfg, lu.rpc,
7069
                     mode=constants.IALLOCATOR_MODE_RELOC,
7070
                     name=instance_name,
7071
                     relocate_from=relocate_from)
7072

    
7073
    ial.Run(iallocator_name)
7074

    
7075
    if not ial.success:
7076
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7077
                                 " %s" % (iallocator_name, ial.info),
7078
                                 errors.ECODE_NORES)
7079

    
7080
    if len(ial.result) != ial.required_nodes:
7081
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7082
                                 " of nodes (%s), required %s" %
7083
                                 (iallocator_name,
7084
                                  len(ial.result), ial.required_nodes),
7085
                                 errors.ECODE_FAULT)
7086

    
7087
    remote_node_name = ial.result[0]
7088

    
7089
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7090
               instance_name, remote_node_name)
7091

    
7092
    return remote_node_name
7093

    
7094
  def _FindFaultyDisks(self, node_name):
7095
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7096
                                    node_name, True)
7097

    
7098
  def CheckPrereq(self):
7099
    """Check prerequisites.
7100

7101
    This checks that the instance is in the cluster.
7102

7103
    """
7104
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7105
    assert instance is not None, \
7106
      "Cannot retrieve locked instance %s" % self.instance_name
7107

    
7108
    if instance.disk_template != constants.DT_DRBD8:
7109
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7110
                                 " instances", errors.ECODE_INVAL)
7111

    
7112
    if len(instance.secondary_nodes) != 1:
7113
      raise errors.OpPrereqError("The instance has a strange layout,"
7114
                                 " expected one secondary but found %d" %
7115
                                 len(instance.secondary_nodes),
7116
                                 errors.ECODE_FAULT)
7117

    
7118
    if not self.delay_iallocator:
7119
      self._CheckPrereq2()
7120

    
7121
  def _CheckPrereq2(self):
7122
    """Check prerequisites, second part.
7123

7124
    This function should always be part of CheckPrereq. It was separated and is
7125
    now called from Exec because during node evacuation iallocator was only
7126
    called with an unmodified cluster model, not taking planned changes into
7127
    account.
7128

7129
    """
7130
    instance = self.instance
7131
    secondary_node = instance.secondary_nodes[0]
7132

    
7133
    if self.iallocator_name is None:
7134
      remote_node = self.remote_node
7135
    else:
7136
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7137
                                       instance.name, instance.secondary_nodes)
7138

    
7139
    if remote_node is not None:
7140
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7141
      assert self.remote_node_info is not None, \
7142
        "Cannot retrieve locked node %s" % remote_node
7143
    else:
7144
      self.remote_node_info = None
7145

    
7146
    if remote_node == self.instance.primary_node:
7147
      raise errors.OpPrereqError("The specified node is the primary node of"
7148
                                 " the instance.", errors.ECODE_INVAL)
7149

    
7150
    if remote_node == secondary_node:
7151
      raise errors.OpPrereqError("The specified node is already the"
7152
                                 " secondary node of the instance.",
7153
                                 errors.ECODE_INVAL)
7154

    
7155
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7156
                                    constants.REPLACE_DISK_CHG):
7157
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7158
                                 errors.ECODE_INVAL)
7159

    
7160
    if self.mode == constants.REPLACE_DISK_AUTO:
7161
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7162
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7163

    
7164
      if faulty_primary and faulty_secondary:
7165
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7166
                                   " one node and can not be repaired"
7167
                                   " automatically" % self.instance_name,
7168
                                   errors.ECODE_STATE)
7169

    
7170
      if faulty_primary:
7171
        self.disks = faulty_primary
7172
        self.target_node = instance.primary_node
7173
        self.other_node = secondary_node
7174
        check_nodes = [self.target_node, self.other_node]
7175
      elif faulty_secondary:
7176
        self.disks = faulty_secondary
7177
        self.target_node = secondary_node
7178
        self.other_node = instance.primary_node
7179
        check_nodes = [self.target_node, self.other_node]
7180
      else:
7181
        self.disks = []
7182
        check_nodes = []
7183

    
7184
    else:
7185
      # Non-automatic modes
7186
      if self.mode == constants.REPLACE_DISK_PRI:
7187
        self.target_node = instance.primary_node
7188
        self.other_node = secondary_node
7189
        check_nodes = [self.target_node, self.other_node]
7190

    
7191
      elif self.mode == constants.REPLACE_DISK_SEC:
7192
        self.target_node = secondary_node
7193
        self.other_node = instance.primary_node
7194
        check_nodes = [self.target_node, self.other_node]
7195

    
7196
      elif self.mode == constants.REPLACE_DISK_CHG:
7197
        self.new_node = remote_node
7198
        self.other_node = instance.primary_node
7199
        self.target_node = secondary_node
7200
        check_nodes = [self.new_node, self.other_node]
7201

    
7202
        _CheckNodeNotDrained(self.lu, remote_node)
7203

    
7204
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
7205
        assert old_node_info is not None
7206
        if old_node_info.offline and not self.early_release:
7207
          # doesn't make sense to delay the release
7208
          self.early_release = True
7209
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7210
                          " early-release mode", secondary_node)
7211

    
7212
      else:
7213
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7214
                                     self.mode)
7215

    
7216
      # If not specified all disks should be replaced
7217
      if not self.disks:
7218
        self.disks = range(len(self.instance.disks))
7219

    
7220
    for node in check_nodes:
7221
      _CheckNodeOnline(self.lu, node)
7222

    
7223
    # Check whether disks are valid
7224
    for disk_idx in self.disks:
7225
      instance.FindDisk(disk_idx)
7226

    
7227
    # Get secondary node IP addresses
7228
    node_2nd_ip = {}
7229

    
7230
    for node_name in [self.target_node, self.other_node, self.new_node]:
7231
      if node_name is not None:
7232
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7233

    
7234
    self.node_secondary_ip = node_2nd_ip
7235

    
7236
  def Exec(self, feedback_fn):
7237
    """Execute disk replacement.
7238

7239
    This dispatches the disk replacement to the appropriate handler.
7240

7241
    """
7242
    if self.delay_iallocator:
7243
      self._CheckPrereq2()
7244

    
7245
    if not self.disks:
7246
      feedback_fn("No disks need replacement")
7247
      return
7248

    
7249
    feedback_fn("Replacing disk(s) %s for %s" %
7250
                (utils.CommaJoin(self.disks), self.instance.name))
7251

    
7252
    activate_disks = (not self.instance.admin_up)
7253

    
7254
    # Activate the instance disks if we're replacing them on a down instance
7255
    if activate_disks:
7256
      _StartInstanceDisks(self.lu, self.instance, True)
7257

    
7258
    try:
7259
      # Should we replace the secondary node?
7260
      if self.new_node is not None:
7261
        fn = self._ExecDrbd8Secondary
7262
      else:
7263
        fn = self._ExecDrbd8DiskOnly
7264

    
7265
      return fn(feedback_fn)
7266

    
7267
    finally:
7268
      # Deactivate the instance disks if we're replacing them on a
7269
      # down instance
7270
      if activate_disks:
7271
        _SafeShutdownInstanceDisks(self.lu, self.instance)
7272

    
7273
  def _CheckVolumeGroup(self, nodes):
7274
    self.lu.LogInfo("Checking volume groups")
7275

    
7276
    vgname = self.cfg.GetVGName()
7277

    
7278
    # Make sure volume group exists on all involved nodes
7279
    results = self.rpc.call_vg_list(nodes)
7280
    if not results:
7281
      raise errors.OpExecError("Can't list volume groups on the nodes")
7282

    
7283
    for node in nodes:
7284
      res = results[node]
7285
      res.Raise("Error checking node %s" % node)
7286
      if vgname not in res.payload:
7287
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
7288
                                 (vgname, node))
7289

    
7290
  def _CheckDisksExistence(self, nodes):
7291
    # Check disk existence
7292
    for idx, dev in enumerate(self.instance.disks):
7293
      if idx not in self.disks:
7294
        continue
7295

    
7296
      for node in nodes:
7297
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7298
        self.cfg.SetDiskID(dev, node)
7299

    
7300
        result = self.rpc.call_blockdev_find(node, dev)
7301

    
7302
        msg = result.fail_msg
7303
        if msg or not result.payload:
7304
          if not msg:
7305
            msg = "disk not found"
7306
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7307
                                   (idx, node, msg))
7308

    
7309
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7310
    for idx, dev in enumerate(self.instance.disks):
7311
      if idx not in self.disks:
7312
        continue
7313

    
7314
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7315
                      (idx, node_name))
7316

    
7317
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7318
                                   ldisk=ldisk):
7319
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7320
                                 " replace disks for instance %s" %
7321
                                 (node_name, self.instance.name))
7322

    
7323
  def _CreateNewStorage(self, node_name):
7324
    vgname = self.cfg.GetVGName()
7325
    iv_names = {}
7326

    
7327
    for idx, dev in enumerate(self.instance.disks):
7328
      if idx not in self.disks:
7329
        continue
7330

    
7331
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7332

    
7333
      self.cfg.SetDiskID(dev, node_name)
7334

    
7335
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7336
      names = _GenerateUniqueNames(self.lu, lv_names)
7337

    
7338
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7339
                             logical_id=(vgname, names[0]))
7340
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7341
                             logical_id=(vgname, names[1]))
7342

    
7343
      new_lvs = [lv_data, lv_meta]
7344
      old_lvs = dev.children
7345
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7346

    
7347
      # we pass force_create=True to force the LVM creation
7348
      for new_lv in new_lvs:
7349
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7350
                        _GetInstanceInfoText(self.instance), False)
7351

    
7352
    return iv_names
7353

    
7354
  def _CheckDevices(self, node_name, iv_names):
7355
    for name, (dev, _, _) in iv_names.iteritems():
7356
      self.cfg.SetDiskID(dev, node_name)
7357

    
7358
      result = self.rpc.call_blockdev_find(node_name, dev)
7359

    
7360
      msg = result.fail_msg
7361
      if msg or not result.payload:
7362
        if not msg:
7363
          msg = "disk not found"
7364
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
7365
                                 (name, msg))
7366

    
7367
      if result.payload.is_degraded:
7368
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
7369

    
7370
  def _RemoveOldStorage(self, node_name, iv_names):
7371
    for name, (_, old_lvs, _) in iv_names.iteritems():
7372
      self.lu.LogInfo("Remove logical volumes for %s" % name)
7373

    
7374
      for lv in old_lvs:
7375
        self.cfg.SetDiskID(lv, node_name)
7376

    
7377
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7378
        if msg:
7379
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
7380
                             hint="remove unused LVs manually")
7381

    
7382
  def _ReleaseNodeLock(self, node_name):
7383
    """Releases the lock for a given node."""
7384
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7385

    
7386
  def _ExecDrbd8DiskOnly(self, feedback_fn):
7387
    """Replace a disk on the primary or secondary for DRBD 8.
7388

7389
    The algorithm for replace is quite complicated:
7390

7391
      1. for each disk to be replaced:
7392

7393
        1. create new LVs on the target node with unique names
7394
        1. detach old LVs from the drbd device
7395
        1. rename old LVs to name_replaced.<time_t>
7396
        1. rename new LVs to old LVs
7397
        1. attach the new LVs (with the old names now) to the drbd device
7398

7399
      1. wait for sync across all devices
7400

7401
      1. for each modified disk:
7402

7403
        1. remove old LVs (which have the name name_replaces.<time_t>)
7404

7405
    Failures are not very well handled.
7406

7407
    """
7408
    steps_total = 6
7409

    
7410
    # Step: check device activation
7411
    self.lu.LogStep(1, steps_total, "Check device existence")
7412
    self._CheckDisksExistence([self.other_node, self.target_node])
7413
    self._CheckVolumeGroup([self.target_node, self.other_node])
7414

    
7415
    # Step: check other node consistency
7416
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7417
    self._CheckDisksConsistency(self.other_node,
7418
                                self.other_node == self.instance.primary_node,
7419
                                False)
7420

    
7421
    # Step: create new storage
7422
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7423
    iv_names = self._CreateNewStorage(self.target_node)
7424

    
7425
    # Step: for each lv, detach+rename*2+attach
7426
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7427
    for dev, old_lvs, new_lvs in iv_names.itervalues():
7428
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7429

    
7430
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7431
                                                     old_lvs)
7432
      result.Raise("Can't detach drbd from local storage on node"
7433
                   " %s for device %s" % (self.target_node, dev.iv_name))
7434
      #dev.children = []
7435
      #cfg.Update(instance)
7436

    
7437
      # ok, we created the new LVs, so now we know we have the needed
7438
      # storage; as such, we proceed on the target node to rename
7439
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7440
      # using the assumption that logical_id == physical_id (which in
7441
      # turn is the unique_id on that node)
7442

    
7443
      # FIXME(iustin): use a better name for the replaced LVs
7444
      temp_suffix = int(time.time())
7445
      ren_fn = lambda d, suff: (d.physical_id[0],
7446
                                d.physical_id[1] + "_replaced-%s" % suff)
7447

    
7448
      # Build the rename list based on what LVs exist on the node
7449
      rename_old_to_new = []
7450
      for to_ren in old_lvs:
7451
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7452
        if not result.fail_msg and result.payload:
7453
          # device exists
7454
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7455

    
7456
      self.lu.LogInfo("Renaming the old LVs on the target node")
7457
      result = self.rpc.call_blockdev_rename(self.target_node,
7458
                                             rename_old_to_new)
7459
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
7460

    
7461
      # Now we rename the new LVs to the old LVs
7462
      self.lu.LogInfo("Renaming the new LVs on the target node")
7463
      rename_new_to_old = [(new, old.physical_id)
7464
                           for old, new in zip(old_lvs, new_lvs)]
7465
      result = self.rpc.call_blockdev_rename(self.target_node,
7466
                                             rename_new_to_old)
7467
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
7468

    
7469
      for old, new in zip(old_lvs, new_lvs):
7470
        new.logical_id = old.logical_id
7471
        self.cfg.SetDiskID(new, self.target_node)
7472

    
7473
      for disk in old_lvs:
7474
        disk.logical_id = ren_fn(disk, temp_suffix)
7475
        self.cfg.SetDiskID(disk, self.target_node)
7476

    
7477
      # Now that the new lvs have the old name, we can add them to the device
7478
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7479
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7480
                                                  new_lvs)
7481
      msg = result.fail_msg
7482
      if msg:
7483
        for new_lv in new_lvs:
7484
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
7485
                                               new_lv).fail_msg
7486
          if msg2:
7487
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7488
                               hint=("cleanup manually the unused logical"
7489
                                     "volumes"))
7490
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7491

    
7492
      dev.children = new_lvs
7493

    
7494
      self.cfg.Update(self.instance, feedback_fn)
7495

    
7496
    cstep = 5
7497
    if self.early_release:
7498
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7499
      cstep += 1
7500
      self._RemoveOldStorage(self.target_node, iv_names)
7501
      # WARNING: we release both node locks here, do not do other RPCs
7502
      # than WaitForSync to the primary node
7503
      self._ReleaseNodeLock([self.target_node, self.other_node])
7504

    
7505
    # Wait for sync
7506
    # This can fail as the old devices are degraded and _WaitForSync
7507
    # does a combined result over all disks, so we don't check its return value
7508
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7509
    cstep += 1
7510
    _WaitForSync(self.lu, self.instance)
7511

    
7512
    # Check all devices manually
7513
    self._CheckDevices(self.instance.primary_node, iv_names)
7514

    
7515
    # Step: remove old storage
7516
    if not self.early_release:
7517
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7518
      cstep += 1
7519
      self._RemoveOldStorage(self.target_node, iv_names)
7520

    
7521
  def _ExecDrbd8Secondary(self, feedback_fn):
7522
    """Replace the secondary node for DRBD 8.
7523

7524
    The algorithm for replace is quite complicated:
7525
      - for all disks of the instance:
7526
        - create new LVs on the new node with same names
7527
        - shutdown the drbd device on the old secondary
7528
        - disconnect the drbd network on the primary
7529
        - create the drbd device on the new secondary
7530
        - network attach the drbd on the primary, using an artifice:
7531
          the drbd code for Attach() will connect to the network if it
7532
          finds a device which is connected to the good local disks but
7533
          not network enabled
7534
      - wait for sync across all devices
7535
      - remove all disks from the old secondary
7536

7537
    Failures are not very well handled.
7538

7539
    """
7540
    steps_total = 6
7541

    
7542
    # Step: check device activation
7543
    self.lu.LogStep(1, steps_total, "Check device existence")
7544
    self._CheckDisksExistence([self.instance.primary_node])
7545
    self._CheckVolumeGroup([self.instance.primary_node])
7546

    
7547
    # Step: check other node consistency
7548
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7549
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
7550

    
7551
    # Step: create new storage
7552
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7553
    for idx, dev in enumerate(self.instance.disks):
7554
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7555
                      (self.new_node, idx))
7556
      # we pass force_create=True to force LVM creation
7557
      for new_lv in dev.children:
7558
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7559
                        _GetInstanceInfoText(self.instance), False)
7560

    
7561
    # Step 4: dbrd minors and drbd setups changes
7562
    # after this, we must manually remove the drbd minors on both the
7563
    # error and the success paths
7564
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7565
    minors = self.cfg.AllocateDRBDMinor([self.new_node
7566
                                         for dev in self.instance.disks],
7567
                                        self.instance.name)
7568
    logging.debug("Allocated minors %r", minors)
7569

    
7570
    iv_names = {}
7571
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7572
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7573
                      (self.new_node, idx))
7574
      # create new devices on new_node; note that we create two IDs:
7575
      # one without port, so the drbd will be activated without
7576
      # networking information on the new node at this stage, and one
7577
      # with network, for the latter activation in step 4
7578
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7579
      if self.instance.primary_node == o_node1:
7580
        p_minor = o_minor1
7581
      else:
7582
        assert self.instance.primary_node == o_node2, "Three-node instance?"
7583
        p_minor = o_minor2
7584

    
7585
      new_alone_id = (self.instance.primary_node, self.new_node, None,
7586
                      p_minor, new_minor, o_secret)
7587
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
7588
                    p_minor, new_minor, o_secret)
7589

    
7590
      iv_names[idx] = (dev, dev.children, new_net_id)
7591
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7592
                    new_net_id)
7593
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7594
                              logical_id=new_alone_id,
7595
                              children=dev.children,
7596
                              size=dev.size)
7597
      try:
7598
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7599
                              _GetInstanceInfoText(self.instance), False)
7600
      except errors.GenericError:
7601
        self.cfg.ReleaseDRBDMinors(self.instance.name)
7602
        raise
7603

    
7604
    # We have new devices, shutdown the drbd on the old secondary
7605
    for idx, dev in enumerate(self.instance.disks):
7606
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7607
      self.cfg.SetDiskID(dev, self.target_node)
7608
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7609
      if msg:
7610
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7611
                           "node: %s" % (idx, msg),
7612
                           hint=("Please cleanup this device manually as"
7613
                                 " soon as possible"))
7614

    
7615
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7616
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7617
                                               self.node_secondary_ip,
7618
                                               self.instance.disks)\
7619
                                              [self.instance.primary_node]
7620

    
7621
    msg = result.fail_msg
7622
    if msg:
7623
      # detaches didn't succeed (unlikely)
7624
      self.cfg.ReleaseDRBDMinors(self.instance.name)
7625
      raise errors.OpExecError("Can't detach the disks from the network on"
7626
                               " old node: %s" % (msg,))
7627

    
7628
    # if we managed to detach at least one, we update all the disks of
7629
    # the instance to point to the new secondary
7630
    self.lu.LogInfo("Updating instance configuration")
7631
    for dev, _, new_logical_id in iv_names.itervalues():
7632
      dev.logical_id = new_logical_id
7633
      self.cfg.SetDiskID(dev, self.instance.primary_node)
7634

    
7635
    self.cfg.Update(self.instance, feedback_fn)
7636

    
7637
    # and now perform the drbd attach
7638
    self.lu.LogInfo("Attaching primary drbds to new secondary"
7639
                    " (standalone => connected)")
7640
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7641
                                            self.new_node],
7642
                                           self.node_secondary_ip,
7643
                                           self.instance.disks,
7644
                                           self.instance.name,
7645
                                           False)
7646
    for to_node, to_result in result.items():
7647
      msg = to_result.fail_msg
7648
      if msg:
7649
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7650
                           to_node, msg,
7651
                           hint=("please do a gnt-instance info to see the"
7652
                                 " status of disks"))
7653
    cstep = 5
7654
    if self.early_release:
7655
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7656
      cstep += 1
7657
      self._RemoveOldStorage(self.target_node, iv_names)
7658
      # WARNING: we release all node locks here, do not do other RPCs
7659
      # than WaitForSync to the primary node
7660
      self._ReleaseNodeLock([self.instance.primary_node,
7661
                             self.target_node,
7662
                             self.new_node])
7663

    
7664
    # Wait for sync
7665
    # This can fail as the old devices are degraded and _WaitForSync
7666
    # does a combined result over all disks, so we don't check its return value
7667
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7668
    cstep += 1
7669
    _WaitForSync(self.lu, self.instance)
7670

    
7671
    # Check all devices manually
7672
    self._CheckDevices(self.instance.primary_node, iv_names)
7673

    
7674
    # Step: remove old storage
7675
    if not self.early_release:
7676
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7677
      self._RemoveOldStorage(self.target_node, iv_names)
7678

    
7679

    
7680
class LURepairNodeStorage(NoHooksLU):
7681
  """Repairs the volume group on a node.
7682

7683
  """
7684
  _OP_REQP = ["node_name"]
7685
  REQ_BGL = False
7686

    
7687
  def CheckArguments(self):
7688
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7689

    
7690
    _CheckStorageType(self.op.storage_type)
7691

    
7692
  def ExpandNames(self):
7693
    self.needed_locks = {
7694
      locking.LEVEL_NODE: [self.op.node_name],
7695
      }
7696

    
7697
  def _CheckFaultyDisks(self, instance, node_name):
7698
    """Ensure faulty disks abort the opcode or at least warn."""
7699
    try:
7700
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7701
                                  node_name, True):
7702
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7703
                                   " node '%s'" % (instance.name, node_name),
7704
                                   errors.ECODE_STATE)
7705
    except errors.OpPrereqError, err:
7706
      if self.op.ignore_consistency:
7707
        self.proc.LogWarning(str(err.args[0]))
7708
      else:
7709
        raise
7710

    
7711
  def CheckPrereq(self):
7712
    """Check prerequisites.
7713

7714
    """
7715
    storage_type = self.op.storage_type
7716

    
7717
    if (constants.SO_FIX_CONSISTENCY not in
7718
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7719
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
7720
                                 " repaired" % storage_type,
7721
                                 errors.ECODE_INVAL)
7722

    
7723
    # Check whether any instance on this node has faulty disks
7724
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7725
      if not inst.admin_up:
7726
        continue
7727
      check_nodes = set(inst.all_nodes)
7728
      check_nodes.discard(self.op.node_name)
7729
      for inst_node_name in check_nodes:
7730
        self._CheckFaultyDisks(inst, inst_node_name)
7731

    
7732
  def Exec(self, feedback_fn):
7733
    feedback_fn("Repairing storage unit '%s' on %s ..." %
7734
                (self.op.name, self.op.node_name))
7735

    
7736
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7737
    result = self.rpc.call_storage_execute(self.op.node_name,
7738
                                           self.op.storage_type, st_args,
7739
                                           self.op.name,
7740
                                           constants.SO_FIX_CONSISTENCY)
7741
    result.Raise("Failed to repair storage unit '%s' on %s" %
7742
                 (self.op.name, self.op.node_name))
7743

    
7744

    
7745
class LUNodeEvacuationStrategy(NoHooksLU):
7746
  """Computes the node evacuation strategy.
7747

7748
  """
7749
  _OP_REQP = ["nodes"]
7750
  REQ_BGL = False
7751

    
7752
  def CheckArguments(self):
7753
    if not hasattr(self.op, "remote_node"):
7754
      self.op.remote_node = None
7755
    if not hasattr(self.op, "iallocator"):
7756
      self.op.iallocator = None
7757
    if self.op.remote_node is not None and self.op.iallocator is not None:
7758
      raise errors.OpPrereqError("Give either the iallocator or the new"
7759
                                 " secondary, not both", errors.ECODE_INVAL)
7760

    
7761
  def ExpandNames(self):
7762
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7763
    self.needed_locks = locks = {}
7764
    if self.op.remote_node is None:
7765
      locks[locking.LEVEL_NODE] = locking.ALL_SET
7766
    else:
7767
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7768
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7769

    
7770
  def CheckPrereq(self):
7771
    pass
7772

    
7773
  def Exec(self, feedback_fn):
7774
    if self.op.remote_node is not None:
7775
      instances = []
7776
      for node in self.op.nodes:
7777
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7778
      result = []
7779
      for i in instances:
7780
        if i.primary_node == self.op.remote_node:
7781
          raise errors.OpPrereqError("Node %s is the primary node of"
7782
                                     " instance %s, cannot use it as"
7783
                                     " secondary" %
7784
                                     (self.op.remote_node, i.name),
7785
                                     errors.ECODE_INVAL)
7786
        result.append([i.name, self.op.remote_node])
7787
    else:
7788
      ial = IAllocator(self.cfg, self.rpc,
7789
                       mode=constants.IALLOCATOR_MODE_MEVAC,
7790
                       evac_nodes=self.op.nodes)
7791
      ial.Run(self.op.iallocator, validate=True)
7792
      if not ial.success:
7793
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7794
                                 errors.ECODE_NORES)
7795
      result = ial.result
7796
    return result
7797

    
7798

    
7799
class LUGrowDisk(LogicalUnit):
7800
  """Grow a disk of an instance.
7801

7802
  """
7803
  HPATH = "disk-grow"
7804
  HTYPE = constants.HTYPE_INSTANCE
7805
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7806
  REQ_BGL = False
7807

    
7808
  def ExpandNames(self):
7809
    self._ExpandAndLockInstance()
7810
    self.needed_locks[locking.LEVEL_NODE] = []
7811
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7812

    
7813
  def DeclareLocks(self, level):
7814
    if level == locking.LEVEL_NODE:
7815
      self._LockInstancesNodes()
7816

    
7817
  def BuildHooksEnv(self):
7818
    """Build hooks env.
7819

7820
    This runs on the master, the primary and all the secondaries.
7821

7822
    """
7823
    env = {
7824
      "DISK": self.op.disk,
7825
      "AMOUNT": self.op.amount,
7826
      }
7827
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7828
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7829
    return env, nl, nl
7830

    
7831
  def CheckPrereq(self):
7832
    """Check prerequisites.
7833

7834
    This checks that the instance is in the cluster.
7835

7836
    """
7837
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7838
    assert instance is not None, \
7839
      "Cannot retrieve locked instance %s" % self.op.instance_name
7840
    nodenames = list(instance.all_nodes)
7841
    for node in nodenames:
7842
      _CheckNodeOnline(self, node)
7843

    
7844

    
7845
    self.instance = instance
7846

    
7847
    if instance.disk_template not in constants.DTS_GROWABLE:
7848
      raise errors.OpPrereqError("Instance's disk layout does not support"
7849
                                 " growing.", errors.ECODE_INVAL)
7850

    
7851
    self.disk = instance.FindDisk(self.op.disk)
7852

    
7853
    if instance.disk_template != constants.DT_FILE:
7854
      # TODO: check the free disk space for file, when that feature will be
7855
      # supported
7856
      _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7857

    
7858
  def Exec(self, feedback_fn):
7859
    """Execute disk grow.
7860

7861
    """
7862
    instance = self.instance
7863
    disk = self.disk
7864
    for node in instance.all_nodes:
7865
      self.cfg.SetDiskID(disk, node)
7866
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7867
      result.Raise("Grow request failed to node %s" % node)
7868

    
7869
      # TODO: Rewrite code to work properly
7870
      # DRBD goes into sync mode for a short amount of time after executing the
7871
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7872
      # calling "resize" in sync mode fails. Sleeping for a short amount of
7873
      # time is a work-around.
7874
      time.sleep(5)
7875

    
7876
    disk.RecordGrow(self.op.amount)
7877
    self.cfg.Update(instance, feedback_fn)
7878
    if self.op.wait_for_sync:
7879
      disk_abort = not _WaitForSync(self, instance)
7880
      if disk_abort:
7881
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7882
                             " status.\nPlease check the instance.")
7883

    
7884

    
7885
class LUQueryInstanceData(NoHooksLU):
7886
  """Query runtime instance data.
7887

7888
  """
7889
  _OP_REQP = ["instances", "static"]
7890
  REQ_BGL = False
7891

    
7892
  def ExpandNames(self):
7893
    self.needed_locks = {}
7894
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7895

    
7896
    if not isinstance(self.op.instances, list):
7897
      raise errors.OpPrereqError("Invalid argument type 'instances'",
7898
                                 errors.ECODE_INVAL)
7899

    
7900
    if self.op.instances:
7901
      self.wanted_names = []
7902
      for name in self.op.instances:
7903
        full_name = _ExpandInstanceName(self.cfg, name)
7904
        self.wanted_names.append(full_name)
7905
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7906
    else:
7907
      self.wanted_names = None
7908
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7909

    
7910
    self.needed_locks[locking.LEVEL_NODE] = []
7911
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7912

    
7913
  def DeclareLocks(self, level):
7914
    if level == locking.LEVEL_NODE:
7915
      self._LockInstancesNodes()
7916

    
7917
  def CheckPrereq(self):
7918
    """Check prerequisites.
7919

7920
    This only checks the optional instance list against the existing names.
7921

7922
    """
7923
    if self.wanted_names is None:
7924
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7925

    
7926
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7927
                             in self.wanted_names]
7928
    return
7929

    
7930
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
7931
    """Returns the status of a block device
7932

7933
    """
7934
    if self.op.static or not node:
7935
      return None
7936

    
7937
    self.cfg.SetDiskID(dev, node)
7938

    
7939
    result = self.rpc.call_blockdev_find(node, dev)
7940
    if result.offline:
7941
      return None
7942

    
7943
    result.Raise("Can't compute disk status for %s" % instance_name)
7944

    
7945
    status = result.payload
7946
    if status is None:
7947
      return None
7948

    
7949
    return (status.dev_path, status.major, status.minor,
7950
            status.sync_percent, status.estimated_time,
7951
            status.is_degraded, status.ldisk_status)
7952

    
7953
  def _ComputeDiskStatus(self, instance, snode, dev):
7954
    """Compute block device status.
7955

7956
    """
7957
    if dev.dev_type in constants.LDS_DRBD:
7958
      # we change the snode then (otherwise we use the one passed in)
7959
      if dev.logical_id[0] == instance.primary_node:
7960
        snode = dev.logical_id[1]
7961
      else:
7962
        snode = dev.logical_id[0]
7963

    
7964
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
7965
                                              instance.name, dev)
7966
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
7967

    
7968
    if dev.children:
7969
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
7970
                      for child in dev.children]
7971
    else:
7972
      dev_children = []
7973

    
7974
    data = {
7975
      "iv_name": dev.iv_name,
7976
      "dev_type": dev.dev_type,
7977
      "logical_id": dev.logical_id,
7978
      "physical_id": dev.physical_id,
7979
      "pstatus": dev_pstatus,
7980
      "sstatus": dev_sstatus,
7981
      "children": dev_children,
7982
      "mode": dev.mode,
7983
      "size": dev.size,
7984
      }
7985

    
7986
    return data
7987

    
7988
  def Exec(self, feedback_fn):
7989
    """Gather and return data"""
7990
    result = {}
7991

    
7992
    cluster = self.cfg.GetClusterInfo()
7993

    
7994
    for instance in self.wanted_instances:
7995
      if not self.op.static:
7996
        remote_info = self.rpc.call_instance_info(instance.primary_node,
7997
                                                  instance.name,
7998
                                                  instance.hypervisor)
7999
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8000
        remote_info = remote_info.payload
8001
        if remote_info and "state" in remote_info:
8002
          remote_state = "up"
8003
        else:
8004
          remote_state = "down"
8005
      else:
8006
        remote_state = None
8007
      if instance.admin_up:
8008
        config_state = "up"
8009
      else:
8010
        config_state = "down"
8011

    
8012
      disks = [self._ComputeDiskStatus(instance, None, device)
8013
               for device in instance.disks]
8014

    
8015
      idict = {
8016
        "name": instance.name,
8017
        "config_state": config_state,
8018
        "run_state": remote_state,
8019
        "pnode": instance.primary_node,
8020
        "snodes": instance.secondary_nodes,
8021
        "os": instance.os,
8022
        # this happens to be the same format used for hooks
8023
        "nics": _NICListToTuple(self, instance.nics),
8024
        "disks": disks,
8025
        "hypervisor": instance.hypervisor,
8026
        "network_port": instance.network_port,
8027
        "hv_instance": instance.hvparams,
8028
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8029
        "be_instance": instance.beparams,
8030
        "be_actual": cluster.FillBE(instance),
8031
        "serial_no": instance.serial_no,
8032
        "mtime": instance.mtime,
8033
        "ctime": instance.ctime,
8034
        "uuid": instance.uuid,
8035
        }
8036

    
8037
      result[instance.name] = idict
8038

    
8039
    return result
8040

    
8041

    
8042
class LUSetInstanceParams(LogicalUnit):
8043
  """Modifies an instances's parameters.
8044

8045
  """
8046
  HPATH = "instance-modify"
8047
  HTYPE = constants.HTYPE_INSTANCE
8048
  _OP_REQP = ["instance_name"]
8049
  REQ_BGL = False
8050

    
8051
  def CheckArguments(self):
8052
    if not hasattr(self.op, 'nics'):
8053
      self.op.nics = []
8054
    if not hasattr(self.op, 'disks'):
8055
      self.op.disks = []
8056
    if not hasattr(self.op, 'beparams'):
8057
      self.op.beparams = {}
8058
    if not hasattr(self.op, 'hvparams'):
8059
      self.op.hvparams = {}
8060
    if not hasattr(self.op, "disk_template"):
8061
      self.op.disk_template = None
8062
    if not hasattr(self.op, "remote_node"):
8063
      self.op.remote_node = None
8064
    if not hasattr(self.op, "os_name"):
8065
      self.op.os_name = None
8066
    if not hasattr(self.op, "force_variant"):
8067
      self.op.force_variant = False
8068
    self.op.force = getattr(self.op, "force", False)
8069
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8070
            self.op.hvparams or self.op.beparams or self.op.os_name):
8071
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8072

    
8073
    if self.op.hvparams:
8074
      _CheckGlobalHvParams(self.op.hvparams)
8075

    
8076
    # Disk validation
8077
    disk_addremove = 0
8078
    for disk_op, disk_dict in self.op.disks:
8079
      if disk_op == constants.DDM_REMOVE:
8080
        disk_addremove += 1
8081
        continue
8082
      elif disk_op == constants.DDM_ADD:
8083
        disk_addremove += 1
8084
      else:
8085
        if not isinstance(disk_op, int):
8086
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8087
        if not isinstance(disk_dict, dict):
8088
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8089
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8090

    
8091
      if disk_op == constants.DDM_ADD:
8092
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8093
        if mode not in constants.DISK_ACCESS_SET:
8094
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8095
                                     errors.ECODE_INVAL)
8096
        size = disk_dict.get('size', None)
8097
        if size is None:
8098
          raise errors.OpPrereqError("Required disk parameter size missing",
8099
                                     errors.ECODE_INVAL)
8100
        try:
8101
          size = int(size)
8102
        except (TypeError, ValueError), err:
8103
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8104
                                     str(err), errors.ECODE_INVAL)
8105
        disk_dict['size'] = size
8106
      else:
8107
        # modification of disk
8108
        if 'size' in disk_dict:
8109
          raise errors.OpPrereqError("Disk size change not possible, use"
8110
                                     " grow-disk", errors.ECODE_INVAL)
8111

    
8112
    if disk_addremove > 1:
8113
      raise errors.OpPrereqError("Only one disk add or remove operation"
8114
                                 " supported at a time", errors.ECODE_INVAL)
8115

    
8116
    if self.op.disks and self.op.disk_template is not None:
8117
      raise errors.OpPrereqError("Disk template conversion and other disk"
8118
                                 " changes not supported at the same time",
8119
                                 errors.ECODE_INVAL)
8120

    
8121
    if self.op.disk_template:
8122
      _CheckDiskTemplate(self.op.disk_template)
8123
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
8124
          self.op.remote_node is None):
8125
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
8126
                                   " one requires specifying a secondary node",
8127
                                   errors.ECODE_INVAL)
8128

    
8129
    # NIC validation
8130
    nic_addremove = 0
8131
    for nic_op, nic_dict in self.op.nics:
8132
      if nic_op == constants.DDM_REMOVE:
8133
        nic_addremove += 1
8134
        continue
8135
      elif nic_op == constants.DDM_ADD:
8136
        nic_addremove += 1
8137
      else:
8138
        if not isinstance(nic_op, int):
8139
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8140
        if not isinstance(nic_dict, dict):
8141
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8142
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8143

    
8144
      # nic_dict should be a dict
8145
      nic_ip = nic_dict.get('ip', None)
8146
      if nic_ip is not None:
8147
        if nic_ip.lower() == constants.VALUE_NONE:
8148
          nic_dict['ip'] = None
8149
        else:
8150
          if not utils.IsValidIP(nic_ip):
8151
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8152
                                       errors.ECODE_INVAL)
8153

    
8154
      nic_bridge = nic_dict.get('bridge', None)
8155
      nic_link = nic_dict.get('link', None)
8156
      if nic_bridge and nic_link:
8157
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8158
                                   " at the same time", errors.ECODE_INVAL)
8159
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8160
        nic_dict['bridge'] = None
8161
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8162
        nic_dict['link'] = None
8163

    
8164
      if nic_op == constants.DDM_ADD:
8165
        nic_mac = nic_dict.get('mac', None)
8166
        if nic_mac is None:
8167
          nic_dict['mac'] = constants.VALUE_AUTO
8168

    
8169
      if 'mac' in nic_dict:
8170
        nic_mac = nic_dict['mac']
8171
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8172
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8173

    
8174
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8175
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8176
                                     " modifying an existing nic",
8177
                                     errors.ECODE_INVAL)
8178

    
8179
    if nic_addremove > 1:
8180
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8181
                                 " supported at a time", errors.ECODE_INVAL)
8182

    
8183
  def ExpandNames(self):
8184
    self._ExpandAndLockInstance()
8185
    self.needed_locks[locking.LEVEL_NODE] = []
8186
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8187

    
8188
  def DeclareLocks(self, level):
8189
    if level == locking.LEVEL_NODE:
8190
      self._LockInstancesNodes()
8191
      if self.op.disk_template and self.op.remote_node:
8192
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8193
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8194

    
8195
  def BuildHooksEnv(self):
8196
    """Build hooks env.
8197

8198
    This runs on the master, primary and secondaries.
8199

8200
    """
8201
    args = dict()
8202
    if constants.BE_MEMORY in self.be_new:
8203
      args['memory'] = self.be_new[constants.BE_MEMORY]
8204
    if constants.BE_VCPUS in self.be_new:
8205
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
8206
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8207
    # information at all.
8208
    if self.op.nics:
8209
      args['nics'] = []
8210
      nic_override = dict(self.op.nics)
8211
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
8212
      for idx, nic in enumerate(self.instance.nics):
8213
        if idx in nic_override:
8214
          this_nic_override = nic_override[idx]
8215
        else:
8216
          this_nic_override = {}
8217
        if 'ip' in this_nic_override:
8218
          ip = this_nic_override['ip']
8219
        else:
8220
          ip = nic.ip
8221
        if 'mac' in this_nic_override:
8222
          mac = this_nic_override['mac']
8223
        else:
8224
          mac = nic.mac
8225
        if idx in self.nic_pnew:
8226
          nicparams = self.nic_pnew[idx]
8227
        else:
8228
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
8229
        mode = nicparams[constants.NIC_MODE]
8230
        link = nicparams[constants.NIC_LINK]
8231
        args['nics'].append((ip, mac, mode, link))
8232
      if constants.DDM_ADD in nic_override:
8233
        ip = nic_override[constants.DDM_ADD].get('ip', None)
8234
        mac = nic_override[constants.DDM_ADD]['mac']
8235
        nicparams = self.nic_pnew[constants.DDM_ADD]
8236
        mode = nicparams[constants.NIC_MODE]
8237
        link = nicparams[constants.NIC_LINK]
8238
        args['nics'].append((ip, mac, mode, link))
8239
      elif constants.DDM_REMOVE in nic_override:
8240
        del args['nics'][-1]
8241

    
8242
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8243
    if self.op.disk_template:
8244
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8245
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8246
    return env, nl, nl
8247

    
8248
  @staticmethod
8249
  def _GetUpdatedParams(old_params, update_dict,
8250
                        default_values, parameter_types):
8251
    """Return the new params dict for the given params.
8252

8253
    @type old_params: dict
8254
    @param old_params: old parameters
8255
    @type update_dict: dict
8256
    @param update_dict: dict containing new parameter values,
8257
                        or constants.VALUE_DEFAULT to reset the
8258
                        parameter to its default value
8259
    @type default_values: dict
8260
    @param default_values: default values for the filled parameters
8261
    @type parameter_types: dict
8262
    @param parameter_types: dict mapping target dict keys to types
8263
                            in constants.ENFORCEABLE_TYPES
8264
    @rtype: (dict, dict)
8265
    @return: (new_parameters, filled_parameters)
8266

8267
    """
8268
    params_copy = copy.deepcopy(old_params)
8269
    for key, val in update_dict.iteritems():
8270
      if val == constants.VALUE_DEFAULT:
8271
        try:
8272
          del params_copy[key]
8273
        except KeyError:
8274
          pass
8275
      else:
8276
        params_copy[key] = val
8277
    utils.ForceDictType(params_copy, parameter_types)
8278
    params_filled = objects.FillDict(default_values, params_copy)
8279
    return (params_copy, params_filled)
8280

    
8281
  def CheckPrereq(self):
8282
    """Check prerequisites.
8283

8284
    This only checks the instance list against the existing names.
8285

8286
    """
8287
    self.force = self.op.force
8288

    
8289
    # checking the new params on the primary/secondary nodes
8290

    
8291
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8292
    cluster = self.cluster = self.cfg.GetClusterInfo()
8293
    assert self.instance is not None, \
8294
      "Cannot retrieve locked instance %s" % self.op.instance_name
8295
    pnode = instance.primary_node
8296
    nodelist = list(instance.all_nodes)
8297

    
8298
    if self.op.disk_template:
8299
      if instance.disk_template == self.op.disk_template:
8300
        raise errors.OpPrereqError("Instance already has disk template %s" %
8301
                                   instance.disk_template, errors.ECODE_INVAL)
8302

    
8303
      if (instance.disk_template,
8304
          self.op.disk_template) not in self._DISK_CONVERSIONS:
8305
        raise errors.OpPrereqError("Unsupported disk template conversion from"
8306
                                   " %s to %s" % (instance.disk_template,
8307
                                                  self.op.disk_template),
8308
                                   errors.ECODE_INVAL)
8309
      if self.op.disk_template in constants.DTS_NET_MIRROR:
8310
        _CheckNodeOnline(self, self.op.remote_node)
8311
        _CheckNodeNotDrained(self, self.op.remote_node)
8312
        disks = [{"size": d.size} for d in instance.disks]
8313
        required = _ComputeDiskSize(self.op.disk_template, disks)
8314
        _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8315
        _CheckInstanceDown(self, instance, "cannot change disk template")
8316

    
8317
    # hvparams processing
8318
    if self.op.hvparams:
8319
      i_hvdict, hv_new = self._GetUpdatedParams(
8320
                             instance.hvparams, self.op.hvparams,
8321
                             cluster.hvparams[instance.hypervisor],
8322
                             constants.HVS_PARAMETER_TYPES)
8323
      # local check
8324
      hypervisor.GetHypervisor(
8325
        instance.hypervisor).CheckParameterSyntax(hv_new)
8326
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8327
      self.hv_new = hv_new # the new actual values
8328
      self.hv_inst = i_hvdict # the new dict (without defaults)
8329
    else:
8330
      self.hv_new = self.hv_inst = {}
8331

    
8332
    # beparams processing
8333
    if self.op.beparams:
8334
      i_bedict, be_new = self._GetUpdatedParams(
8335
                             instance.beparams, self.op.beparams,
8336
                             cluster.beparams[constants.PP_DEFAULT],
8337
                             constants.BES_PARAMETER_TYPES)
8338
      self.be_new = be_new # the new actual values
8339
      self.be_inst = i_bedict # the new dict (without defaults)
8340
    else:
8341
      self.be_new = self.be_inst = {}
8342

    
8343
    self.warn = []
8344

    
8345
    if constants.BE_MEMORY in self.op.beparams and not self.force:
8346
      mem_check_list = [pnode]
8347
      if be_new[constants.BE_AUTO_BALANCE]:
8348
        # either we changed auto_balance to yes or it was from before
8349
        mem_check_list.extend(instance.secondary_nodes)
8350
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
8351
                                                  instance.hypervisor)
8352
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8353
                                         instance.hypervisor)
8354
      pninfo = nodeinfo[pnode]
8355
      msg = pninfo.fail_msg
8356
      if msg:
8357
        # Assume the primary node is unreachable and go ahead
8358
        self.warn.append("Can't get info from primary node %s: %s" %
8359
                         (pnode,  msg))
8360
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
8361
        self.warn.append("Node data from primary node %s doesn't contain"
8362
                         " free memory information" % pnode)
8363
      elif instance_info.fail_msg:
8364
        self.warn.append("Can't get instance runtime information: %s" %
8365
                        instance_info.fail_msg)
8366
      else:
8367
        if instance_info.payload:
8368
          current_mem = int(instance_info.payload['memory'])
8369
        else:
8370
          # Assume instance not running
8371
          # (there is a slight race condition here, but it's not very probable,
8372
          # and we have no other way to check)
8373
          current_mem = 0
8374
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8375
                    pninfo.payload['memory_free'])
8376
        if miss_mem > 0:
8377
          raise errors.OpPrereqError("This change will prevent the instance"
8378
                                     " from starting, due to %d MB of memory"
8379
                                     " missing on its primary node" % miss_mem,
8380
                                     errors.ECODE_NORES)
8381

    
8382
      if be_new[constants.BE_AUTO_BALANCE]:
8383
        for node, nres in nodeinfo.items():
8384
          if node not in instance.secondary_nodes:
8385
            continue
8386
          msg = nres.fail_msg
8387
          if msg:
8388
            self.warn.append("Can't get info from secondary node %s: %s" %
8389
                             (node, msg))
8390
          elif not isinstance(nres.payload.get('memory_free', None), int):
8391
            self.warn.append("Secondary node %s didn't return free"
8392
                             " memory information" % node)
8393
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8394
            self.warn.append("Not enough memory to failover instance to"
8395
                             " secondary node %s" % node)
8396

    
8397
    # NIC processing
8398
    self.nic_pnew = {}
8399
    self.nic_pinst = {}
8400
    for nic_op, nic_dict in self.op.nics:
8401
      if nic_op == constants.DDM_REMOVE:
8402
        if not instance.nics:
8403
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8404
                                     errors.ECODE_INVAL)
8405
        continue
8406
      if nic_op != constants.DDM_ADD:
8407
        # an existing nic
8408
        if not instance.nics:
8409
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8410
                                     " no NICs" % nic_op,
8411
                                     errors.ECODE_INVAL)
8412
        if nic_op < 0 or nic_op >= len(instance.nics):
8413
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8414
                                     " are 0 to %d" %
8415
                                     (nic_op, len(instance.nics) - 1),
8416
                                     errors.ECODE_INVAL)
8417
        old_nic_params = instance.nics[nic_op].nicparams
8418
        old_nic_ip = instance.nics[nic_op].ip
8419
      else:
8420
        old_nic_params = {}
8421
        old_nic_ip = None
8422

    
8423
      update_params_dict = dict([(key, nic_dict[key])
8424
                                 for key in constants.NICS_PARAMETERS
8425
                                 if key in nic_dict])
8426

    
8427
      if 'bridge' in nic_dict:
8428
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8429

    
8430
      new_nic_params, new_filled_nic_params = \
8431
          self._GetUpdatedParams(old_nic_params, update_params_dict,
8432
                                 cluster.nicparams[constants.PP_DEFAULT],
8433
                                 constants.NICS_PARAMETER_TYPES)
8434
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8435
      self.nic_pinst[nic_op] = new_nic_params
8436
      self.nic_pnew[nic_op] = new_filled_nic_params
8437
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8438

    
8439
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
8440
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8441
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8442
        if msg:
8443
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8444
          if self.force:
8445
            self.warn.append(msg)
8446
          else:
8447
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8448
      if new_nic_mode == constants.NIC_MODE_ROUTED:
8449
        if 'ip' in nic_dict:
8450
          nic_ip = nic_dict['ip']
8451
        else:
8452
          nic_ip = old_nic_ip
8453
        if nic_ip is None:
8454
          raise errors.OpPrereqError('Cannot set the nic ip to None'
8455
                                     ' on a routed nic', errors.ECODE_INVAL)
8456
      if 'mac' in nic_dict:
8457
        nic_mac = nic_dict['mac']
8458
        if nic_mac is None:
8459
          raise errors.OpPrereqError('Cannot set the nic mac to None',
8460
                                     errors.ECODE_INVAL)
8461
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8462
          # otherwise generate the mac
8463
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8464
        else:
8465
          # or validate/reserve the current one
8466
          try:
8467
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8468
          except errors.ReservationError:
8469
            raise errors.OpPrereqError("MAC address %s already in use"
8470
                                       " in cluster" % nic_mac,
8471
                                       errors.ECODE_NOTUNIQUE)
8472

    
8473
    # DISK processing
8474
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8475
      raise errors.OpPrereqError("Disk operations not supported for"
8476
                                 " diskless instances",
8477
                                 errors.ECODE_INVAL)
8478
    for disk_op, _ in self.op.disks:
8479
      if disk_op == constants.DDM_REMOVE:
8480
        if len(instance.disks) == 1:
8481
          raise errors.OpPrereqError("Cannot remove the last disk of"
8482
                                     " an instance", errors.ECODE_INVAL)
8483
        _CheckInstanceDown(self, instance, "cannot remove disks")
8484

    
8485
      if (disk_op == constants.DDM_ADD and
8486
          len(instance.nics) >= constants.MAX_DISKS):
8487
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8488
                                   " add more" % constants.MAX_DISKS,
8489
                                   errors.ECODE_STATE)
8490
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8491
        # an existing disk
8492
        if disk_op < 0 or disk_op >= len(instance.disks):
8493
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
8494
                                     " are 0 to %d" %
8495
                                     (disk_op, len(instance.disks)),
8496
                                     errors.ECODE_INVAL)
8497

    
8498
    # OS change
8499
    if self.op.os_name and not self.op.force:
8500
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8501
                      self.op.force_variant)
8502

    
8503
    return
8504

    
8505
  def _ConvertPlainToDrbd(self, feedback_fn):
8506
    """Converts an instance from plain to drbd.
8507

8508
    """
8509
    feedback_fn("Converting template to drbd")
8510
    instance = self.instance
8511
    pnode = instance.primary_node
8512
    snode = self.op.remote_node
8513

    
8514
    # create a fake disk info for _GenerateDiskTemplate
8515
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8516
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8517
                                      instance.name, pnode, [snode],
8518
                                      disk_info, None, None, 0)
8519
    info = _GetInstanceInfoText(instance)
8520
    feedback_fn("Creating aditional volumes...")
8521
    # first, create the missing data and meta devices
8522
    for disk in new_disks:
8523
      # unfortunately this is... not too nice
8524
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8525
                            info, True)
8526
      for child in disk.children:
8527
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
8528
    # at this stage, all new LVs have been created, we can rename the
8529
    # old ones
8530
    feedback_fn("Renaming original volumes...")
8531
    rename_list = [(o, n.children[0].logical_id)
8532
                   for (o, n) in zip(instance.disks, new_disks)]
8533
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
8534
    result.Raise("Failed to rename original LVs")
8535

    
8536
    feedback_fn("Initializing DRBD devices...")
8537
    # all child devices are in place, we can now create the DRBD devices
8538
    for disk in new_disks:
8539
      for node in [pnode, snode]:
8540
        f_create = node == pnode
8541
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8542

    
8543
    # at this point, the instance has been modified
8544
    instance.disk_template = constants.DT_DRBD8
8545
    instance.disks = new_disks
8546
    self.cfg.Update(instance, feedback_fn)
8547

    
8548
    # disks are created, waiting for sync
8549
    disk_abort = not _WaitForSync(self, instance)
8550
    if disk_abort:
8551
      raise errors.OpExecError("There are some degraded disks for"
8552
                               " this instance, please cleanup manually")
8553

    
8554
  def _ConvertDrbdToPlain(self, feedback_fn):
8555
    """Converts an instance from drbd to plain.
8556

8557
    """
8558
    instance = self.instance
8559
    assert len(instance.secondary_nodes) == 1
8560
    pnode = instance.primary_node
8561
    snode = instance.secondary_nodes[0]
8562
    feedback_fn("Converting template to plain")
8563

    
8564
    old_disks = instance.disks
8565
    new_disks = [d.children[0] for d in old_disks]
8566

    
8567
    # copy over size and mode
8568
    for parent, child in zip(old_disks, new_disks):
8569
      child.size = parent.size
8570
      child.mode = parent.mode
8571

    
8572
    # update instance structure
8573
    instance.disks = new_disks
8574
    instance.disk_template = constants.DT_PLAIN
8575
    self.cfg.Update(instance, feedback_fn)
8576

    
8577
    feedback_fn("Removing volumes on the secondary node...")
8578
    for disk in old_disks:
8579
      self.cfg.SetDiskID(disk, snode)
8580
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8581
      if msg:
8582
        self.LogWarning("Could not remove block device %s on node %s,"
8583
                        " continuing anyway: %s", disk.iv_name, snode, msg)
8584

    
8585
    feedback_fn("Removing unneeded volumes on the primary node...")
8586
    for idx, disk in enumerate(old_disks):
8587
      meta = disk.children[1]
8588
      self.cfg.SetDiskID(meta, pnode)
8589
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8590
      if msg:
8591
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
8592
                        " continuing anyway: %s", idx, pnode, msg)
8593

    
8594

    
8595
  def Exec(self, feedback_fn):
8596
    """Modifies an instance.
8597

8598
    All parameters take effect only at the next restart of the instance.
8599

8600
    """
8601
    # Process here the warnings from CheckPrereq, as we don't have a
8602
    # feedback_fn there.
8603
    for warn in self.warn:
8604
      feedback_fn("WARNING: %s" % warn)
8605

    
8606
    result = []
8607
    instance = self.instance
8608
    # disk changes
8609
    for disk_op, disk_dict in self.op.disks:
8610
      if disk_op == constants.DDM_REMOVE:
8611
        # remove the last disk
8612
        device = instance.disks.pop()
8613
        device_idx = len(instance.disks)
8614
        for node, disk in device.ComputeNodeTree(instance.primary_node):
8615
          self.cfg.SetDiskID(disk, node)
8616
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8617
          if msg:
8618
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
8619
                            " continuing anyway", device_idx, node, msg)
8620
        result.append(("disk/%d" % device_idx, "remove"))
8621
      elif disk_op == constants.DDM_ADD:
8622
        # add a new disk
8623
        if instance.disk_template == constants.DT_FILE:
8624
          file_driver, file_path = instance.disks[0].logical_id
8625
          file_path = os.path.dirname(file_path)
8626
        else:
8627
          file_driver = file_path = None
8628
        disk_idx_base = len(instance.disks)
8629
        new_disk = _GenerateDiskTemplate(self,
8630
                                         instance.disk_template,
8631
                                         instance.name, instance.primary_node,
8632
                                         instance.secondary_nodes,
8633
                                         [disk_dict],
8634
                                         file_path,
8635
                                         file_driver,
8636
                                         disk_idx_base)[0]
8637
        instance.disks.append(new_disk)
8638
        info = _GetInstanceInfoText(instance)
8639

    
8640
        logging.info("Creating volume %s for instance %s",
8641
                     new_disk.iv_name, instance.name)
8642
        # Note: this needs to be kept in sync with _CreateDisks
8643
        #HARDCODE
8644
        for node in instance.all_nodes:
8645
          f_create = node == instance.primary_node
8646
          try:
8647
            _CreateBlockDev(self, node, instance, new_disk,
8648
                            f_create, info, f_create)
8649
          except errors.OpExecError, err:
8650
            self.LogWarning("Failed to create volume %s (%s) on"
8651
                            " node %s: %s",
8652
                            new_disk.iv_name, new_disk, node, err)
8653
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8654
                       (new_disk.size, new_disk.mode)))
8655
      else:
8656
        # change a given disk
8657
        instance.disks[disk_op].mode = disk_dict['mode']
8658
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8659

    
8660
    if self.op.disk_template:
8661
      r_shut = _ShutdownInstanceDisks(self, instance)
8662
      if not r_shut:
8663
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8664
                                 " proceed with disk template conversion")
8665
      mode = (instance.disk_template, self.op.disk_template)
8666
      try:
8667
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
8668
      except:
8669
        self.cfg.ReleaseDRBDMinors(instance.name)
8670
        raise
8671
      result.append(("disk_template", self.op.disk_template))
8672

    
8673
    # NIC changes
8674
    for nic_op, nic_dict in self.op.nics:
8675
      if nic_op == constants.DDM_REMOVE:
8676
        # remove the last nic
8677
        del instance.nics[-1]
8678
        result.append(("nic.%d" % len(instance.nics), "remove"))
8679
      elif nic_op == constants.DDM_ADD:
8680
        # mac and bridge should be set, by now
8681
        mac = nic_dict['mac']
8682
        ip = nic_dict.get('ip', None)
8683
        nicparams = self.nic_pinst[constants.DDM_ADD]
8684
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8685
        instance.nics.append(new_nic)
8686
        result.append(("nic.%d" % (len(instance.nics) - 1),
8687
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
8688
                       (new_nic.mac, new_nic.ip,
8689
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8690
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8691
                       )))
8692
      else:
8693
        for key in 'mac', 'ip':
8694
          if key in nic_dict:
8695
            setattr(instance.nics[nic_op], key, nic_dict[key])
8696
        if nic_op in self.nic_pinst:
8697
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8698
        for key, val in nic_dict.iteritems():
8699
          result.append(("nic.%s/%d" % (key, nic_op), val))
8700

    
8701
    # hvparams changes
8702
    if self.op.hvparams:
8703
      instance.hvparams = self.hv_inst
8704
      for key, val in self.op.hvparams.iteritems():
8705
        result.append(("hv/%s" % key, val))
8706

    
8707
    # beparams changes
8708
    if self.op.beparams:
8709
      instance.beparams = self.be_inst
8710
      for key, val in self.op.beparams.iteritems():
8711
        result.append(("be/%s" % key, val))
8712

    
8713
    # OS change
8714
    if self.op.os_name:
8715
      instance.os = self.op.os_name
8716

    
8717
    self.cfg.Update(instance, feedback_fn)
8718

    
8719
    return result
8720

    
8721
  _DISK_CONVERSIONS = {
8722
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8723
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8724
    }
8725

    
8726
class LUQueryExports(NoHooksLU):
8727
  """Query the exports list
8728

8729
  """
8730
  _OP_REQP = ['nodes']
8731
  REQ_BGL = False
8732

    
8733
  def ExpandNames(self):
8734
    self.needed_locks = {}
8735
    self.share_locks[locking.LEVEL_NODE] = 1
8736
    if not self.op.nodes:
8737
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8738
    else:
8739
      self.needed_locks[locking.LEVEL_NODE] = \
8740
        _GetWantedNodes(self, self.op.nodes)
8741

    
8742
  def CheckPrereq(self):
8743
    """Check prerequisites.
8744

8745
    """
8746
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8747

    
8748
  def Exec(self, feedback_fn):
8749
    """Compute the list of all the exported system images.
8750

8751
    @rtype: dict
8752
    @return: a dictionary with the structure node->(export-list)
8753
        where export-list is a list of the instances exported on
8754
        that node.
8755

8756
    """
8757
    rpcresult = self.rpc.call_export_list(self.nodes)
8758
    result = {}
8759
    for node in rpcresult:
8760
      if rpcresult[node].fail_msg:
8761
        result[node] = False
8762
      else:
8763
        result[node] = rpcresult[node].payload
8764

    
8765
    return result
8766

    
8767

    
8768
class LUExportInstance(LogicalUnit):
8769
  """Export an instance to an image in the cluster.
8770

8771
  """
8772
  HPATH = "instance-export"
8773
  HTYPE = constants.HTYPE_INSTANCE
8774
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
8775
  REQ_BGL = False
8776

    
8777
  def CheckArguments(self):
8778
    """Check the arguments.
8779

8780
    """
8781
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8782
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
8783

    
8784
  def ExpandNames(self):
8785
    self._ExpandAndLockInstance()
8786
    # FIXME: lock only instance primary and destination node
8787
    #
8788
    # Sad but true, for now we have do lock all nodes, as we don't know where
8789
    # the previous export might be, and and in this LU we search for it and
8790
    # remove it from its current node. In the future we could fix this by:
8791
    #  - making a tasklet to search (share-lock all), then create the new one,
8792
    #    then one to remove, after
8793
    #  - removing the removal operation altogether
8794
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8795

    
8796
  def DeclareLocks(self, level):
8797
    """Last minute lock declaration."""
8798
    # All nodes are locked anyway, so nothing to do here.
8799

    
8800
  def BuildHooksEnv(self):
8801
    """Build hooks env.
8802

8803
    This will run on the master, primary node and target node.
8804

8805
    """
8806
    env = {
8807
      "EXPORT_NODE": self.op.target_node,
8808
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8809
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8810
      }
8811
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8812
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8813
          self.op.target_node]
8814
    return env, nl, nl
8815

    
8816
  def CheckPrereq(self):
8817
    """Check prerequisites.
8818

8819
    This checks that the instance and node names are valid.
8820

8821
    """
8822
    instance_name = self.op.instance_name
8823
    self.instance = self.cfg.GetInstanceInfo(instance_name)
8824
    assert self.instance is not None, \
8825
          "Cannot retrieve locked instance %s" % self.op.instance_name
8826
    _CheckNodeOnline(self, self.instance.primary_node)
8827

    
8828
    self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8829
    self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8830
    assert self.dst_node is not None
8831

    
8832
    _CheckNodeOnline(self, self.dst_node.name)
8833
    _CheckNodeNotDrained(self, self.dst_node.name)
8834

    
8835
    # instance disk type verification
8836
    for disk in self.instance.disks:
8837
      if disk.dev_type == constants.LD_FILE:
8838
        raise errors.OpPrereqError("Export not supported for instances with"
8839
                                   " file-based disks", errors.ECODE_INVAL)
8840

    
8841
  def Exec(self, feedback_fn):
8842
    """Export an instance to an image in the cluster.
8843

8844
    """
8845
    instance = self.instance
8846
    dst_node = self.dst_node
8847
    src_node = instance.primary_node
8848

    
8849
    if self.op.shutdown:
8850
      # shutdown the instance, but not the disks
8851
      feedback_fn("Shutting down instance %s" % instance.name)
8852
      result = self.rpc.call_instance_shutdown(src_node, instance,
8853
                                               self.shutdown_timeout)
8854
      result.Raise("Could not shutdown instance %s on"
8855
                   " node %s" % (instance.name, src_node))
8856

    
8857
    vgname = self.cfg.GetVGName()
8858

    
8859
    snap_disks = []
8860

    
8861
    # set the disks ID correctly since call_instance_start needs the
8862
    # correct drbd minor to create the symlinks
8863
    for disk in instance.disks:
8864
      self.cfg.SetDiskID(disk, src_node)
8865

    
8866
    activate_disks = (not instance.admin_up)
8867

    
8868
    if activate_disks:
8869
      # Activate the instance disks if we'exporting a stopped instance
8870
      feedback_fn("Activating disks for %s" % instance.name)
8871
      _StartInstanceDisks(self, instance, None)
8872

    
8873
    try:
8874
      # per-disk results
8875
      dresults = []
8876
      try:
8877
        for idx, disk in enumerate(instance.disks):
8878
          feedback_fn("Creating a snapshot of disk/%s on node %s" %
8879
                      (idx, src_node))
8880

    
8881
          # result.payload will be a snapshot of an lvm leaf of the one we
8882
          # passed
8883
          result = self.rpc.call_blockdev_snapshot(src_node, disk)
8884
          msg = result.fail_msg
8885
          if msg:
8886
            self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8887
                            idx, src_node, msg)
8888
            snap_disks.append(False)
8889
          else:
8890
            disk_id = (vgname, result.payload)
8891
            new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8892
                                   logical_id=disk_id, physical_id=disk_id,
8893
                                   iv_name=disk.iv_name)
8894
            snap_disks.append(new_dev)
8895

    
8896
      finally:
8897
        if self.op.shutdown and instance.admin_up:
8898
          feedback_fn("Starting instance %s" % instance.name)
8899
          result = self.rpc.call_instance_start(src_node, instance, None, None)
8900
          msg = result.fail_msg
8901
          if msg:
8902
            _ShutdownInstanceDisks(self, instance)
8903
            raise errors.OpExecError("Could not start instance: %s" % msg)
8904

    
8905
      # TODO: check for size
8906

    
8907
      cluster_name = self.cfg.GetClusterName()
8908
      for idx, dev in enumerate(snap_disks):
8909
        feedback_fn("Exporting snapshot %s from %s to %s" %
8910
                    (idx, src_node, dst_node.name))
8911
        if dev:
8912
          # FIXME: pass debug from opcode to backend
8913
          result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
8914
                                                 instance, cluster_name,
8915
                                                 idx, self.op.debug_level)
8916
          msg = result.fail_msg
8917
          if msg:
8918
            self.LogWarning("Could not export disk/%s from node %s to"
8919
                            " node %s: %s", idx, src_node, dst_node.name, msg)
8920
            dresults.append(False)
8921
          else:
8922
            dresults.append(True)
8923
          msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
8924
          if msg:
8925
            self.LogWarning("Could not remove snapshot for disk/%d from node"
8926
                            " %s: %s", idx, src_node, msg)
8927
        else:
8928
          dresults.append(False)
8929

    
8930
      feedback_fn("Finalizing export on %s" % dst_node.name)
8931
      result = self.rpc.call_finalize_export(dst_node.name, instance,
8932
                                             snap_disks)
8933
      fin_resu = True
8934
      msg = result.fail_msg
8935
      if msg:
8936
        self.LogWarning("Could not finalize export for instance %s"
8937
                        " on node %s: %s", instance.name, dst_node.name, msg)
8938
        fin_resu = False
8939

    
8940
    finally:
8941
      if activate_disks:
8942
        feedback_fn("Deactivating disks for %s" % instance.name)
8943
        _ShutdownInstanceDisks(self, instance)
8944

    
8945
    nodelist = self.cfg.GetNodeList()
8946
    nodelist.remove(dst_node.name)
8947

    
8948
    # on one-node clusters nodelist will be empty after the removal
8949
    # if we proceed the backup would be removed because OpQueryExports
8950
    # substitutes an empty list with the full cluster node list.
8951
    iname = instance.name
8952
    if nodelist:
8953
      feedback_fn("Removing old exports for instance %s" % iname)
8954
      exportlist = self.rpc.call_export_list(nodelist)
8955
      for node in exportlist:
8956
        if exportlist[node].fail_msg:
8957
          continue
8958
        if iname in exportlist[node].payload:
8959
          msg = self.rpc.call_export_remove(node, iname).fail_msg
8960
          if msg:
8961
            self.LogWarning("Could not remove older export for instance %s"
8962
                            " on node %s: %s", iname, node, msg)
8963
    return fin_resu, dresults
8964

    
8965

    
8966
class LURemoveExport(NoHooksLU):
8967
  """Remove exports related to the named instance.
8968

8969
  """
8970
  _OP_REQP = ["instance_name"]
8971
  REQ_BGL = False
8972

    
8973
  def ExpandNames(self):
8974
    self.needed_locks = {}
8975
    # We need all nodes to be locked in order for RemoveExport to work, but we
8976
    # don't need to lock the instance itself, as nothing will happen to it (and
8977
    # we can remove exports also for a removed instance)
8978
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8979

    
8980
  def CheckPrereq(self):
8981
    """Check prerequisites.
8982
    """
8983
    pass
8984

    
8985
  def Exec(self, feedback_fn):
8986
    """Remove any export.
8987

8988
    """
8989
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
8990
    # If the instance was not found we'll try with the name that was passed in.
8991
    # This will only work if it was an FQDN, though.
8992
    fqdn_warn = False
8993
    if not instance_name:
8994
      fqdn_warn = True
8995
      instance_name = self.op.instance_name
8996

    
8997
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
8998
    exportlist = self.rpc.call_export_list(locked_nodes)
8999
    found = False
9000
    for node in exportlist:
9001
      msg = exportlist[node].fail_msg
9002
      if msg:
9003
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9004
        continue
9005
      if instance_name in exportlist[node].payload:
9006
        found = True
9007
        result = self.rpc.call_export_remove(node, instance_name)
9008
        msg = result.fail_msg
9009
        if msg:
9010
          logging.error("Could not remove export for instance %s"
9011
                        " on node %s: %s", instance_name, node, msg)
9012

    
9013
    if fqdn_warn and not found:
9014
      feedback_fn("Export not found. If trying to remove an export belonging"
9015
                  " to a deleted instance please use its Fully Qualified"
9016
                  " Domain Name.")
9017

    
9018

    
9019
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9020
  """Generic tags LU.
9021

9022
  This is an abstract class which is the parent of all the other tags LUs.
9023

9024
  """
9025

    
9026
  def ExpandNames(self):
9027
    self.needed_locks = {}
9028
    if self.op.kind == constants.TAG_NODE:
9029
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9030
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
9031
    elif self.op.kind == constants.TAG_INSTANCE:
9032
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9033
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9034

    
9035
  def CheckPrereq(self):
9036
    """Check prerequisites.
9037

9038
    """
9039
    if self.op.kind == constants.TAG_CLUSTER:
9040
      self.target = self.cfg.GetClusterInfo()
9041
    elif self.op.kind == constants.TAG_NODE:
9042
      self.target = self.cfg.GetNodeInfo(self.op.name)
9043
    elif self.op.kind == constants.TAG_INSTANCE:
9044
      self.target = self.cfg.GetInstanceInfo(self.op.name)
9045
    else:
9046
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9047
                                 str(self.op.kind), errors.ECODE_INVAL)
9048

    
9049

    
9050
class LUGetTags(TagsLU):
9051
  """Returns the tags of a given object.
9052

9053
  """
9054
  _OP_REQP = ["kind", "name"]
9055
  REQ_BGL = False
9056

    
9057
  def Exec(self, feedback_fn):
9058
    """Returns the tag list.
9059

9060
    """
9061
    return list(self.target.GetTags())
9062

    
9063

    
9064
class LUSearchTags(NoHooksLU):
9065
  """Searches the tags for a given pattern.
9066

9067
  """
9068
  _OP_REQP = ["pattern"]
9069
  REQ_BGL = False
9070

    
9071
  def ExpandNames(self):
9072
    self.needed_locks = {}
9073

    
9074
  def CheckPrereq(self):
9075
    """Check prerequisites.
9076

9077
    This checks the pattern passed for validity by compiling it.
9078

9079
    """
9080
    try:
9081
      self.re = re.compile(self.op.pattern)
9082
    except re.error, err:
9083
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9084
                                 (self.op.pattern, err), errors.ECODE_INVAL)
9085

    
9086
  def Exec(self, feedback_fn):
9087
    """Returns the tag list.
9088

9089
    """
9090
    cfg = self.cfg
9091
    tgts = [("/cluster", cfg.GetClusterInfo())]
9092
    ilist = cfg.GetAllInstancesInfo().values()
9093
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9094
    nlist = cfg.GetAllNodesInfo().values()
9095
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9096
    results = []
9097
    for path, target in tgts:
9098
      for tag in target.GetTags():
9099
        if self.re.search(tag):
9100
          results.append((path, tag))
9101
    return results
9102

    
9103

    
9104
class LUAddTags(TagsLU):
9105
  """Sets a tag on a given object.
9106

9107
  """
9108
  _OP_REQP = ["kind", "name", "tags"]
9109
  REQ_BGL = False
9110

    
9111
  def CheckPrereq(self):
9112
    """Check prerequisites.
9113

9114
    This checks the type and length of the tag name and value.
9115

9116
    """
9117
    TagsLU.CheckPrereq(self)
9118
    for tag in self.op.tags:
9119
      objects.TaggableObject.ValidateTag(tag)
9120

    
9121
  def Exec(self, feedback_fn):
9122
    """Sets the tag.
9123

9124
    """
9125
    try:
9126
      for tag in self.op.tags:
9127
        self.target.AddTag(tag)
9128
    except errors.TagError, err:
9129
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
9130
    self.cfg.Update(self.target, feedback_fn)
9131

    
9132

    
9133
class LUDelTags(TagsLU):
9134
  """Delete a list of tags from a given object.
9135

9136
  """
9137
  _OP_REQP = ["kind", "name", "tags"]
9138
  REQ_BGL = False
9139

    
9140
  def CheckPrereq(self):
9141
    """Check prerequisites.
9142

9143
    This checks that we have the given tag.
9144

9145
    """
9146
    TagsLU.CheckPrereq(self)
9147
    for tag in self.op.tags:
9148
      objects.TaggableObject.ValidateTag(tag)
9149
    del_tags = frozenset(self.op.tags)
9150
    cur_tags = self.target.GetTags()
9151
    if not del_tags <= cur_tags:
9152
      diff_tags = del_tags - cur_tags
9153
      diff_names = ["'%s'" % tag for tag in diff_tags]
9154
      diff_names.sort()
9155
      raise errors.OpPrereqError("Tag(s) %s not found" %
9156
                                 (",".join(diff_names)), errors.ECODE_NOENT)
9157

    
9158
  def Exec(self, feedback_fn):
9159
    """Remove the tag from the object.
9160

9161
    """
9162
    for tag in self.op.tags:
9163
      self.target.RemoveTag(tag)
9164
    self.cfg.Update(self.target, feedback_fn)
9165

    
9166

    
9167
class LUTestDelay(NoHooksLU):
9168
  """Sleep for a specified amount of time.
9169

9170
  This LU sleeps on the master and/or nodes for a specified amount of
9171
  time.
9172

9173
  """
9174
  _OP_REQP = ["duration", "on_master", "on_nodes"]
9175
  REQ_BGL = False
9176

    
9177
  def ExpandNames(self):
9178
    """Expand names and set required locks.
9179

9180
    This expands the node list, if any.
9181

9182
    """
9183
    self.needed_locks = {}
9184
    if self.op.on_nodes:
9185
      # _GetWantedNodes can be used here, but is not always appropriate to use
9186
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9187
      # more information.
9188
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9189
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9190

    
9191
  def CheckPrereq(self):
9192
    """Check prerequisites.
9193

9194
    """
9195

    
9196
  def Exec(self, feedback_fn):
9197
    """Do the actual sleep.
9198

9199
    """
9200
    if self.op.on_master:
9201
      if not utils.TestDelay(self.op.duration):
9202
        raise errors.OpExecError("Error during master delay test")
9203
    if self.op.on_nodes:
9204
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9205
      for node, node_result in result.items():
9206
        node_result.Raise("Failure during rpc call to node %s" % node)
9207

    
9208

    
9209
class IAllocator(object):
9210
  """IAllocator framework.
9211

9212
  An IAllocator instance has three sets of attributes:
9213
    - cfg that is needed to query the cluster
9214
    - input data (all members of the _KEYS class attribute are required)
9215
    - four buffer attributes (in|out_data|text), that represent the
9216
      input (to the external script) in text and data structure format,
9217
      and the output from it, again in two formats
9218
    - the result variables from the script (success, info, nodes) for
9219
      easy usage
9220

9221
  """
9222
  # pylint: disable-msg=R0902
9223
  # lots of instance attributes
9224
  _ALLO_KEYS = [
9225
    "name", "mem_size", "disks", "disk_template",
9226
    "os", "tags", "nics", "vcpus", "hypervisor",
9227
    ]
9228
  _RELO_KEYS = [
9229
    "name", "relocate_from",
9230
    ]
9231
  _EVAC_KEYS = [
9232
    "evac_nodes",
9233
    ]
9234

    
9235
  def __init__(self, cfg, rpc, mode, **kwargs):
9236
    self.cfg = cfg
9237
    self.rpc = rpc
9238
    # init buffer variables
9239
    self.in_text = self.out_text = self.in_data = self.out_data = None
9240
    # init all input fields so that pylint is happy
9241
    self.mode = mode
9242
    self.mem_size = self.disks = self.disk_template = None
9243
    self.os = self.tags = self.nics = self.vcpus = None
9244
    self.hypervisor = None
9245
    self.relocate_from = None
9246
    self.name = None
9247
    self.evac_nodes = None
9248
    # computed fields
9249
    self.required_nodes = None
9250
    # init result fields
9251
    self.success = self.info = self.result = None
9252
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9253
      keyset = self._ALLO_KEYS
9254
      fn = self._AddNewInstance
9255
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9256
      keyset = self._RELO_KEYS
9257
      fn = self._AddRelocateInstance
9258
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9259
      keyset = self._EVAC_KEYS
9260
      fn = self._AddEvacuateNodes
9261
    else:
9262
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9263
                                   " IAllocator" % self.mode)
9264
    for key in kwargs:
9265
      if key not in keyset:
9266
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
9267
                                     " IAllocator" % key)
9268
      setattr(self, key, kwargs[key])
9269

    
9270
    for key in keyset:
9271
      if key not in kwargs:
9272
        raise errors.ProgrammerError("Missing input parameter '%s' to"
9273
                                     " IAllocator" % key)
9274
    self._BuildInputData(fn)
9275

    
9276
  def _ComputeClusterData(self):
9277
    """Compute the generic allocator input data.
9278

9279
    This is the data that is independent of the actual operation.
9280

9281
    """
9282
    cfg = self.cfg
9283
    cluster_info = cfg.GetClusterInfo()
9284
    # cluster data
9285
    data = {
9286
      "version": constants.IALLOCATOR_VERSION,
9287
      "cluster_name": cfg.GetClusterName(),
9288
      "cluster_tags": list(cluster_info.GetTags()),
9289
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9290
      # we don't have job IDs
9291
      }
9292
    iinfo = cfg.GetAllInstancesInfo().values()
9293
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9294

    
9295
    # node data
9296
    node_results = {}
9297
    node_list = cfg.GetNodeList()
9298

    
9299
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9300
      hypervisor_name = self.hypervisor
9301
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9302
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9303
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9304
      hypervisor_name = cluster_info.enabled_hypervisors[0]
9305

    
9306
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9307
                                        hypervisor_name)
9308
    node_iinfo = \
9309
      self.rpc.call_all_instances_info(node_list,
9310
                                       cluster_info.enabled_hypervisors)
9311
    for nname, nresult in node_data.items():
9312
      # first fill in static (config-based) values
9313
      ninfo = cfg.GetNodeInfo(nname)
9314
      pnr = {
9315
        "tags": list(ninfo.GetTags()),
9316
        "primary_ip": ninfo.primary_ip,
9317
        "secondary_ip": ninfo.secondary_ip,
9318
        "offline": ninfo.offline,
9319
        "drained": ninfo.drained,
9320
        "master_candidate": ninfo.master_candidate,
9321
        }
9322

    
9323
      if not (ninfo.offline or ninfo.drained):
9324
        nresult.Raise("Can't get data for node %s" % nname)
9325
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9326
                                nname)
9327
        remote_info = nresult.payload
9328

    
9329
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
9330
                     'vg_size', 'vg_free', 'cpu_total']:
9331
          if attr not in remote_info:
9332
            raise errors.OpExecError("Node '%s' didn't return attribute"
9333
                                     " '%s'" % (nname, attr))
9334
          if not isinstance(remote_info[attr], int):
9335
            raise errors.OpExecError("Node '%s' returned invalid value"
9336
                                     " for '%s': %s" %
9337
                                     (nname, attr, remote_info[attr]))
9338
        # compute memory used by primary instances
9339
        i_p_mem = i_p_up_mem = 0
9340
        for iinfo, beinfo in i_list:
9341
          if iinfo.primary_node == nname:
9342
            i_p_mem += beinfo[constants.BE_MEMORY]
9343
            if iinfo.name not in node_iinfo[nname].payload:
9344
              i_used_mem = 0
9345
            else:
9346
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9347
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9348
            remote_info['memory_free'] -= max(0, i_mem_diff)
9349

    
9350
            if iinfo.admin_up:
9351
              i_p_up_mem += beinfo[constants.BE_MEMORY]
9352

    
9353
        # compute memory used by instances
9354
        pnr_dyn = {
9355
          "total_memory": remote_info['memory_total'],
9356
          "reserved_memory": remote_info['memory_dom0'],
9357
          "free_memory": remote_info['memory_free'],
9358
          "total_disk": remote_info['vg_size'],
9359
          "free_disk": remote_info['vg_free'],
9360
          "total_cpus": remote_info['cpu_total'],
9361
          "i_pri_memory": i_p_mem,
9362
          "i_pri_up_memory": i_p_up_mem,
9363
          }
9364
        pnr.update(pnr_dyn)
9365

    
9366
      node_results[nname] = pnr
9367
    data["nodes"] = node_results
9368

    
9369
    # instance data
9370
    instance_data = {}
9371
    for iinfo, beinfo in i_list:
9372
      nic_data = []
9373
      for nic in iinfo.nics:
9374
        filled_params = objects.FillDict(
9375
            cluster_info.nicparams[constants.PP_DEFAULT],
9376
            nic.nicparams)
9377
        nic_dict = {"mac": nic.mac,
9378
                    "ip": nic.ip,
9379
                    "mode": filled_params[constants.NIC_MODE],
9380
                    "link": filled_params[constants.NIC_LINK],
9381
                   }
9382
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9383
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9384
        nic_data.append(nic_dict)
9385
      pir = {
9386
        "tags": list(iinfo.GetTags()),
9387
        "admin_up": iinfo.admin_up,
9388
        "vcpus": beinfo[constants.BE_VCPUS],
9389
        "memory": beinfo[constants.BE_MEMORY],
9390
        "os": iinfo.os,
9391
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9392
        "nics": nic_data,
9393
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9394
        "disk_template": iinfo.disk_template,
9395
        "hypervisor": iinfo.hypervisor,
9396
        }
9397
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9398
                                                 pir["disks"])
9399
      instance_data[iinfo.name] = pir
9400

    
9401
    data["instances"] = instance_data
9402

    
9403
    self.in_data = data
9404

    
9405
  def _AddNewInstance(self):
9406
    """Add new instance data to allocator structure.
9407

9408
    This in combination with _AllocatorGetClusterData will create the
9409
    correct structure needed as input for the allocator.
9410

9411
    The checks for the completeness of the opcode must have already been
9412
    done.
9413

9414
    """
9415
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9416

    
9417
    if self.disk_template in constants.DTS_NET_MIRROR:
9418
      self.required_nodes = 2
9419
    else:
9420
      self.required_nodes = 1
9421
    request = {
9422
      "name": self.name,
9423
      "disk_template": self.disk_template,
9424
      "tags": self.tags,
9425
      "os": self.os,
9426
      "vcpus": self.vcpus,
9427
      "memory": self.mem_size,
9428
      "disks": self.disks,
9429
      "disk_space_total": disk_space,
9430
      "nics": self.nics,
9431
      "required_nodes": self.required_nodes,
9432
      }
9433
    return request
9434

    
9435
  def _AddRelocateInstance(self):
9436
    """Add relocate instance data to allocator structure.
9437

9438
    This in combination with _IAllocatorGetClusterData will create the
9439
    correct structure needed as input for the allocator.
9440

9441
    The checks for the completeness of the opcode must have already been
9442
    done.
9443

9444
    """
9445
    instance = self.cfg.GetInstanceInfo(self.name)
9446
    if instance is None:
9447
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
9448
                                   " IAllocator" % self.name)
9449

    
9450
    if instance.disk_template not in constants.DTS_NET_MIRROR:
9451
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9452
                                 errors.ECODE_INVAL)
9453

    
9454
    if len(instance.secondary_nodes) != 1:
9455
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
9456
                                 errors.ECODE_STATE)
9457

    
9458
    self.required_nodes = 1
9459
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
9460
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9461

    
9462
    request = {
9463
      "name": self.name,
9464
      "disk_space_total": disk_space,
9465
      "required_nodes": self.required_nodes,
9466
      "relocate_from": self.relocate_from,
9467
      }
9468
    return request
9469

    
9470
  def _AddEvacuateNodes(self):
9471
    """Add evacuate nodes data to allocator structure.
9472

9473
    """
9474
    request = {
9475
      "evac_nodes": self.evac_nodes
9476
      }
9477
    return request
9478

    
9479
  def _BuildInputData(self, fn):
9480
    """Build input data structures.
9481

9482
    """
9483
    self._ComputeClusterData()
9484

    
9485
    request = fn()
9486
    request["type"] = self.mode
9487
    self.in_data["request"] = request
9488

    
9489
    self.in_text = serializer.Dump(self.in_data)
9490

    
9491
  def Run(self, name, validate=True, call_fn=None):
9492
    """Run an instance allocator and return the results.
9493

9494
    """
9495
    if call_fn is None:
9496
      call_fn = self.rpc.call_iallocator_runner
9497

    
9498
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9499
    result.Raise("Failure while running the iallocator script")
9500

    
9501
    self.out_text = result.payload
9502
    if validate:
9503
      self._ValidateResult()
9504

    
9505
  def _ValidateResult(self):
9506
    """Process the allocator results.
9507

9508
    This will process and if successful save the result in
9509
    self.out_data and the other parameters.
9510

9511
    """
9512
    try:
9513
      rdict = serializer.Load(self.out_text)
9514
    except Exception, err:
9515
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9516

    
9517
    if not isinstance(rdict, dict):
9518
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
9519

    
9520
    # TODO: remove backwards compatiblity in later versions
9521
    if "nodes" in rdict and "result" not in rdict:
9522
      rdict["result"] = rdict["nodes"]
9523
      del rdict["nodes"]
9524

    
9525
    for key in "success", "info", "result":
9526
      if key not in rdict:
9527
        raise errors.OpExecError("Can't parse iallocator results:"
9528
                                 " missing key '%s'" % key)
9529
      setattr(self, key, rdict[key])
9530

    
9531
    if not isinstance(rdict["result"], list):
9532
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9533
                               " is not a list")
9534
    self.out_data = rdict
9535

    
9536

    
9537
class LUTestAllocator(NoHooksLU):
9538
  """Run allocator tests.
9539

9540
  This LU runs the allocator tests
9541

9542
  """
9543
  _OP_REQP = ["direction", "mode", "name"]
9544

    
9545
  def CheckPrereq(self):
9546
    """Check prerequisites.
9547

9548
    This checks the opcode parameters depending on the director and mode test.
9549

9550
    """
9551
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9552
      for attr in ["name", "mem_size", "disks", "disk_template",
9553
                   "os", "tags", "nics", "vcpus"]:
9554
        if not hasattr(self.op, attr):
9555
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9556
                                     attr, errors.ECODE_INVAL)
9557
      iname = self.cfg.ExpandInstanceName(self.op.name)
9558
      if iname is not None:
9559
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9560
                                   iname, errors.ECODE_EXISTS)
9561
      if not isinstance(self.op.nics, list):
9562
        raise errors.OpPrereqError("Invalid parameter 'nics'",
9563
                                   errors.ECODE_INVAL)
9564
      for row in self.op.nics:
9565
        if (not isinstance(row, dict) or
9566
            "mac" not in row or
9567
            "ip" not in row or
9568
            "bridge" not in row):
9569
          raise errors.OpPrereqError("Invalid contents of the 'nics'"
9570
                                     " parameter", errors.ECODE_INVAL)
9571
      if not isinstance(self.op.disks, list):
9572
        raise errors.OpPrereqError("Invalid parameter 'disks'",
9573
                                   errors.ECODE_INVAL)
9574
      for row in self.op.disks:
9575
        if (not isinstance(row, dict) or
9576
            "size" not in row or
9577
            not isinstance(row["size"], int) or
9578
            "mode" not in row or
9579
            row["mode"] not in ['r', 'w']):
9580
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
9581
                                     " parameter", errors.ECODE_INVAL)
9582
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9583
        self.op.hypervisor = self.cfg.GetHypervisorType()
9584
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9585
      if not hasattr(self.op, "name"):
9586
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9587
                                   errors.ECODE_INVAL)
9588
      fname = _ExpandInstanceName(self.cfg, self.op.name)
9589
      self.op.name = fname
9590
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9591
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9592
      if not hasattr(self.op, "evac_nodes"):
9593
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9594
                                   " opcode input", errors.ECODE_INVAL)
9595
    else:
9596
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9597
                                 self.op.mode, errors.ECODE_INVAL)
9598

    
9599
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9600
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
9601
        raise errors.OpPrereqError("Missing allocator name",
9602
                                   errors.ECODE_INVAL)
9603
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9604
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
9605
                                 self.op.direction, errors.ECODE_INVAL)
9606

    
9607
  def Exec(self, feedback_fn):
9608
    """Run the allocator test.
9609

9610
    """
9611
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9612
      ial = IAllocator(self.cfg, self.rpc,
9613
                       mode=self.op.mode,
9614
                       name=self.op.name,
9615
                       mem_size=self.op.mem_size,
9616
                       disks=self.op.disks,
9617
                       disk_template=self.op.disk_template,
9618
                       os=self.op.os,
9619
                       tags=self.op.tags,
9620
                       nics=self.op.nics,
9621
                       vcpus=self.op.vcpus,
9622
                       hypervisor=self.op.hypervisor,
9623
                       )
9624
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9625
      ial = IAllocator(self.cfg, self.rpc,
9626
                       mode=self.op.mode,
9627
                       name=self.op.name,
9628
                       relocate_from=list(self.relocate_from),
9629
                       )
9630
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9631
      ial = IAllocator(self.cfg, self.rpc,
9632
                       mode=self.op.mode,
9633
                       evac_nodes=self.op.evac_nodes)
9634
    else:
9635
      raise errors.ProgrammerError("Uncatched mode %s in"
9636
                                   " LUTestAllocator.Exec", self.op.mode)
9637

    
9638
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
9639
      result = ial.in_text
9640
    else:
9641
      ial.Run(self.op.allocator, validate=False)
9642
      result = ial.out_text
9643
    return result