Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ fdad8c4d

History | View | Annotate | Download (337.8 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
import os
30
import os.path
31
import time
32
import re
33
import platform
34
import logging
35
import copy
36
import OpenSSL
37

    
38
from ganeti import ssh
39
from ganeti import utils
40
from ganeti import errors
41
from ganeti import hypervisor
42
from ganeti import locking
43
from ganeti import constants
44
from ganeti import objects
45
from ganeti import serializer
46
from ganeti import ssconf
47
from ganeti import uidpool
48

    
49

    
50
class LogicalUnit(object):
51
  """Logical Unit base class.
52

53
  Subclasses must follow these rules:
54
    - implement ExpandNames
55
    - implement CheckPrereq (except when tasklets are used)
56
    - implement Exec (except when tasklets are used)
57
    - implement BuildHooksEnv
58
    - redefine HPATH and HTYPE
59
    - optionally redefine their run requirements:
60
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
61

62
  Note that all commands require root permissions.
63

64
  @ivar dry_run_result: the value (if any) that will be returned to the caller
65
      in dry-run mode (signalled by opcode dry_run parameter)
66

67
  """
68
  HPATH = None
69
  HTYPE = None
70
  _OP_REQP = []
71
  REQ_BGL = True
72

    
73
  def __init__(self, processor, op, context, rpc):
74
    """Constructor for LogicalUnit.
75

76
    This needs to be overridden in derived classes in order to check op
77
    validity.
78

79
    """
80
    self.proc = processor
81
    self.op = op
82
    self.cfg = context.cfg
83
    self.context = context
84
    self.rpc = rpc
85
    # Dicts used to declare locking needs to mcpu
86
    self.needed_locks = None
87
    self.acquired_locks = {}
88
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
89
    self.add_locks = {}
90
    self.remove_locks = {}
91
    # Used to force good behavior when calling helper functions
92
    self.recalculate_locks = {}
93
    self.__ssh = None
94
    # logging
95
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
96
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
97
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
98
    # support for dry-run
99
    self.dry_run_result = None
100
    # support for generic debug attribute
101
    if (not hasattr(self.op, "debug_level") or
102
        not isinstance(self.op.debug_level, int)):
103
      self.op.debug_level = 0
104

    
105
    # Tasklets
106
    self.tasklets = None
107

    
108
    for attr_name in self._OP_REQP:
109
      attr_val = getattr(op, attr_name, None)
110
      if attr_val is None:
111
        raise errors.OpPrereqError("Required parameter '%s' missing" %
112
                                   attr_name, errors.ECODE_INVAL)
113

    
114
    self.CheckArguments()
115

    
116
  def __GetSSH(self):
117
    """Returns the SshRunner object
118

119
    """
120
    if not self.__ssh:
121
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
122
    return self.__ssh
123

    
124
  ssh = property(fget=__GetSSH)
125

    
126
  def CheckArguments(self):
127
    """Check syntactic validity for the opcode arguments.
128

129
    This method is for doing a simple syntactic check and ensure
130
    validity of opcode parameters, without any cluster-related
131
    checks. While the same can be accomplished in ExpandNames and/or
132
    CheckPrereq, doing these separate is better because:
133

134
      - ExpandNames is left as as purely a lock-related function
135
      - CheckPrereq is run after we have acquired locks (and possible
136
        waited for them)
137

138
    The function is allowed to change the self.op attribute so that
139
    later methods can no longer worry about missing parameters.
140

141
    """
142
    pass
143

    
144
  def ExpandNames(self):
145
    """Expand names for this LU.
146

147
    This method is called before starting to execute the opcode, and it should
148
    update all the parameters of the opcode to their canonical form (e.g. a
149
    short node name must be fully expanded after this method has successfully
150
    completed). This way locking, hooks, logging, ecc. can work correctly.
151

152
    LUs which implement this method must also populate the self.needed_locks
153
    member, as a dict with lock levels as keys, and a list of needed lock names
154
    as values. Rules:
155

156
      - use an empty dict if you don't need any lock
157
      - if you don't need any lock at a particular level omit that level
158
      - don't put anything for the BGL level
159
      - if you want all locks at a level use locking.ALL_SET as a value
160

161
    If you need to share locks (rather than acquire them exclusively) at one
162
    level you can modify self.share_locks, setting a true value (usually 1) for
163
    that level. By default locks are not shared.
164

165
    This function can also define a list of tasklets, which then will be
166
    executed in order instead of the usual LU-level CheckPrereq and Exec
167
    functions, if those are not defined by the LU.
168

169
    Examples::
170

171
      # Acquire all nodes and one instance
172
      self.needed_locks = {
173
        locking.LEVEL_NODE: locking.ALL_SET,
174
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
175
      }
176
      # Acquire just two nodes
177
      self.needed_locks = {
178
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
179
      }
180
      # Acquire no locks
181
      self.needed_locks = {} # No, you can't leave it to the default value None
182

183
    """
184
    # The implementation of this method is mandatory only if the new LU is
185
    # concurrent, so that old LUs don't need to be changed all at the same
186
    # time.
187
    if self.REQ_BGL:
188
      self.needed_locks = {} # Exclusive LUs don't need locks.
189
    else:
190
      raise NotImplementedError
191

    
192
  def DeclareLocks(self, level):
193
    """Declare LU locking needs for a level
194

195
    While most LUs can just declare their locking needs at ExpandNames time,
196
    sometimes there's the need to calculate some locks after having acquired
197
    the ones before. This function is called just before acquiring locks at a
198
    particular level, but after acquiring the ones at lower levels, and permits
199
    such calculations. It can be used to modify self.needed_locks, and by
200
    default it does nothing.
201

202
    This function is only called if you have something already set in
203
    self.needed_locks for the level.
204

205
    @param level: Locking level which is going to be locked
206
    @type level: member of ganeti.locking.LEVELS
207

208
    """
209

    
210
  def CheckPrereq(self):
211
    """Check prerequisites for this LU.
212

213
    This method should check that the prerequisites for the execution
214
    of this LU are fulfilled. It can do internode communication, but
215
    it should be idempotent - no cluster or system changes are
216
    allowed.
217

218
    The method should raise errors.OpPrereqError in case something is
219
    not fulfilled. Its return value is ignored.
220

221
    This method should also update all the parameters of the opcode to
222
    their canonical form if it hasn't been done by ExpandNames before.
223

224
    """
225
    if self.tasklets is not None:
226
      for (idx, tl) in enumerate(self.tasklets):
227
        logging.debug("Checking prerequisites for tasklet %s/%s",
228
                      idx + 1, len(self.tasklets))
229
        tl.CheckPrereq()
230
    else:
231
      raise NotImplementedError
232

    
233
  def Exec(self, feedback_fn):
234
    """Execute the LU.
235

236
    This method should implement the actual work. It should raise
237
    errors.OpExecError for failures that are somewhat dealt with in
238
    code, or expected.
239

240
    """
241
    if self.tasklets is not None:
242
      for (idx, tl) in enumerate(self.tasklets):
243
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
244
        tl.Exec(feedback_fn)
245
    else:
246
      raise NotImplementedError
247

    
248
  def BuildHooksEnv(self):
249
    """Build hooks environment for this LU.
250

251
    This method should return a three-node tuple consisting of: a dict
252
    containing the environment that will be used for running the
253
    specific hook for this LU, a list of node names on which the hook
254
    should run before the execution, and a list of node names on which
255
    the hook should run after the execution.
256

257
    The keys of the dict must not have 'GANETI_' prefixed as this will
258
    be handled in the hooks runner. Also note additional keys will be
259
    added by the hooks runner. If the LU doesn't define any
260
    environment, an empty dict (and not None) should be returned.
261

262
    No nodes should be returned as an empty list (and not None).
263

264
    Note that if the HPATH for a LU class is None, this function will
265
    not be called.
266

267
    """
268
    raise NotImplementedError
269

    
270
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
271
    """Notify the LU about the results of its hooks.
272

273
    This method is called every time a hooks phase is executed, and notifies
274
    the Logical Unit about the hooks' result. The LU can then use it to alter
275
    its result based on the hooks.  By default the method does nothing and the
276
    previous result is passed back unchanged but any LU can define it if it
277
    wants to use the local cluster hook-scripts somehow.
278

279
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
280
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
281
    @param hook_results: the results of the multi-node hooks rpc call
282
    @param feedback_fn: function used send feedback back to the caller
283
    @param lu_result: the previous Exec result this LU had, or None
284
        in the PRE phase
285
    @return: the new Exec result, based on the previous result
286
        and hook results
287

288
    """
289
    # API must be kept, thus we ignore the unused argument and could
290
    # be a function warnings
291
    # pylint: disable-msg=W0613,R0201
292
    return lu_result
293

    
294
  def _ExpandAndLockInstance(self):
295
    """Helper function to expand and lock an instance.
296

297
    Many LUs that work on an instance take its name in self.op.instance_name
298
    and need to expand it and then declare the expanded name for locking. This
299
    function does it, and then updates self.op.instance_name to the expanded
300
    name. It also initializes needed_locks as a dict, if this hasn't been done
301
    before.
302

303
    """
304
    if self.needed_locks is None:
305
      self.needed_locks = {}
306
    else:
307
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
308
        "_ExpandAndLockInstance called with instance-level locks set"
309
    self.op.instance_name = _ExpandInstanceName(self.cfg,
310
                                                self.op.instance_name)
311
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
312

    
313
  def _LockInstancesNodes(self, primary_only=False):
314
    """Helper function to declare instances' nodes for locking.
315

316
    This function should be called after locking one or more instances to lock
317
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
318
    with all primary or secondary nodes for instances already locked and
319
    present in self.needed_locks[locking.LEVEL_INSTANCE].
320

321
    It should be called from DeclareLocks, and for safety only works if
322
    self.recalculate_locks[locking.LEVEL_NODE] is set.
323

324
    In the future it may grow parameters to just lock some instance's nodes, or
325
    to just lock primaries or secondary nodes, if needed.
326

327
    If should be called in DeclareLocks in a way similar to::
328

329
      if level == locking.LEVEL_NODE:
330
        self._LockInstancesNodes()
331

332
    @type primary_only: boolean
333
    @param primary_only: only lock primary nodes of locked instances
334

335
    """
336
    assert locking.LEVEL_NODE in self.recalculate_locks, \
337
      "_LockInstancesNodes helper function called with no nodes to recalculate"
338

    
339
    # TODO: check if we're really been called with the instance locks held
340

    
341
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
342
    # future we might want to have different behaviors depending on the value
343
    # of self.recalculate_locks[locking.LEVEL_NODE]
344
    wanted_nodes = []
345
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
346
      instance = self.context.cfg.GetInstanceInfo(instance_name)
347
      wanted_nodes.append(instance.primary_node)
348
      if not primary_only:
349
        wanted_nodes.extend(instance.secondary_nodes)
350

    
351
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
352
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
353
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
354
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
355

    
356
    del self.recalculate_locks[locking.LEVEL_NODE]
357

    
358

    
359
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
360
  """Simple LU which runs no hooks.
361

362
  This LU is intended as a parent for other LogicalUnits which will
363
  run no hooks, in order to reduce duplicate code.
364

365
  """
366
  HPATH = None
367
  HTYPE = None
368

    
369
  def BuildHooksEnv(self):
370
    """Empty BuildHooksEnv for NoHooksLu.
371

372
    This just raises an error.
373

374
    """
375
    assert False, "BuildHooksEnv called for NoHooksLUs"
376

    
377

    
378
class Tasklet:
379
  """Tasklet base class.
380

381
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
382
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
383
  tasklets know nothing about locks.
384

385
  Subclasses must follow these rules:
386
    - Implement CheckPrereq
387
    - Implement Exec
388

389
  """
390
  def __init__(self, lu):
391
    self.lu = lu
392

    
393
    # Shortcuts
394
    self.cfg = lu.cfg
395
    self.rpc = lu.rpc
396

    
397
  def CheckPrereq(self):
398
    """Check prerequisites for this tasklets.
399

400
    This method should check whether the prerequisites for the execution of
401
    this tasklet are fulfilled. It can do internode communication, but it
402
    should be idempotent - no cluster or system changes are allowed.
403

404
    The method should raise errors.OpPrereqError in case something is not
405
    fulfilled. Its return value is ignored.
406

407
    This method should also update all parameters to their canonical form if it
408
    hasn't been done before.
409

410
    """
411
    raise NotImplementedError
412

    
413
  def Exec(self, feedback_fn):
414
    """Execute the tasklet.
415

416
    This method should implement the actual work. It should raise
417
    errors.OpExecError for failures that are somewhat dealt with in code, or
418
    expected.
419

420
    """
421
    raise NotImplementedError
422

    
423

    
424
def _GetWantedNodes(lu, nodes):
425
  """Returns list of checked and expanded node names.
426

427
  @type lu: L{LogicalUnit}
428
  @param lu: the logical unit on whose behalf we execute
429
  @type nodes: list
430
  @param nodes: list of node names or None for all nodes
431
  @rtype: list
432
  @return: the list of nodes, sorted
433
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
434

435
  """
436
  if not isinstance(nodes, list):
437
    raise errors.OpPrereqError("Invalid argument type 'nodes'",
438
                               errors.ECODE_INVAL)
439

    
440
  if not nodes:
441
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
442
      " non-empty list of nodes whose name is to be expanded.")
443

    
444
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
445
  return utils.NiceSort(wanted)
446

    
447

    
448
def _GetWantedInstances(lu, instances):
449
  """Returns list of checked and expanded instance names.
450

451
  @type lu: L{LogicalUnit}
452
  @param lu: the logical unit on whose behalf we execute
453
  @type instances: list
454
  @param instances: list of instance names or None for all instances
455
  @rtype: list
456
  @return: the list of instances, sorted
457
  @raise errors.OpPrereqError: if the instances parameter is wrong type
458
  @raise errors.OpPrereqError: if any of the passed instances is not found
459

460
  """
461
  if not isinstance(instances, list):
462
    raise errors.OpPrereqError("Invalid argument type 'instances'",
463
                               errors.ECODE_INVAL)
464

    
465
  if instances:
466
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
467
  else:
468
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
469
  return wanted
470

    
471

    
472
def _CheckOutputFields(static, dynamic, selected):
473
  """Checks whether all selected fields are valid.
474

475
  @type static: L{utils.FieldSet}
476
  @param static: static fields set
477
  @type dynamic: L{utils.FieldSet}
478
  @param dynamic: dynamic fields set
479

480
  """
481
  f = utils.FieldSet()
482
  f.Extend(static)
483
  f.Extend(dynamic)
484

    
485
  delta = f.NonMatching(selected)
486
  if delta:
487
    raise errors.OpPrereqError("Unknown output fields selected: %s"
488
                               % ",".join(delta), errors.ECODE_INVAL)
489

    
490

    
491
def _CheckBooleanOpField(op, name):
492
  """Validates boolean opcode parameters.
493

494
  This will ensure that an opcode parameter is either a boolean value,
495
  or None (but that it always exists).
496

497
  """
498
  val = getattr(op, name, None)
499
  if not (val is None or isinstance(val, bool)):
500
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
501
                               (name, str(val)), errors.ECODE_INVAL)
502
  setattr(op, name, val)
503

    
504

    
505
def _CheckGlobalHvParams(params):
506
  """Validates that given hypervisor params are not global ones.
507

508
  This will ensure that instances don't get customised versions of
509
  global params.
510

511
  """
512
  used_globals = constants.HVC_GLOBALS.intersection(params)
513
  if used_globals:
514
    msg = ("The following hypervisor parameters are global and cannot"
515
           " be customized at instance level, please modify them at"
516
           " cluster level: %s" % utils.CommaJoin(used_globals))
517
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
518

    
519

    
520
def _CheckNodeOnline(lu, node):
521
  """Ensure that a given node is online.
522

523
  @param lu: the LU on behalf of which we make the check
524
  @param node: the node to check
525
  @raise errors.OpPrereqError: if the node is offline
526

527
  """
528
  if lu.cfg.GetNodeInfo(node).offline:
529
    raise errors.OpPrereqError("Can't use offline node %s" % node,
530
                               errors.ECODE_INVAL)
531

    
532

    
533
def _CheckNodeNotDrained(lu, node):
534
  """Ensure that a given node is not drained.
535

536
  @param lu: the LU on behalf of which we make the check
537
  @param node: the node to check
538
  @raise errors.OpPrereqError: if the node is drained
539

540
  """
541
  if lu.cfg.GetNodeInfo(node).drained:
542
    raise errors.OpPrereqError("Can't use drained node %s" % node,
543
                               errors.ECODE_INVAL)
544

    
545

    
546
def _CheckNodeHasOS(lu, node, os_name, force_variant):
547
  """Ensure that a node supports a given OS.
548

549
  @param lu: the LU on behalf of which we make the check
550
  @param node: the node to check
551
  @param os_name: the OS to query about
552
  @param force_variant: whether to ignore variant errors
553
  @raise errors.OpPrereqError: if the node is not supporting the OS
554

555
  """
556
  result = lu.rpc.call_os_get(node, os_name)
557
  result.Raise("OS '%s' not in supported OS list for node %s" %
558
               (os_name, node),
559
               prereq=True, ecode=errors.ECODE_INVAL)
560
  if not force_variant:
561
    _CheckOSVariant(result.payload, os_name)
562

    
563

    
564
def _RequireFileStorage():
565
  """Checks that file storage is enabled.
566

567
  @raise errors.OpPrereqError: when file storage is disabled
568

569
  """
570
  if not constants.ENABLE_FILE_STORAGE:
571
    raise errors.OpPrereqError("File storage disabled at configure time",
572
                               errors.ECODE_INVAL)
573

    
574

    
575
def _CheckDiskTemplate(template):
576
  """Ensure a given disk template is valid.
577

578
  """
579
  if template not in constants.DISK_TEMPLATES:
580
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
581
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
582
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
583
  if template == constants.DT_FILE:
584
    _RequireFileStorage()
585

    
586

    
587
def _CheckStorageType(storage_type):
588
  """Ensure a given storage type is valid.
589

590
  """
591
  if storage_type not in constants.VALID_STORAGE_TYPES:
592
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
593
                               errors.ECODE_INVAL)
594
  if storage_type == constants.ST_FILE:
595
    _RequireFileStorage()
596

    
597

    
598

    
599
def _CheckInstanceDown(lu, instance, reason):
600
  """Ensure that an instance is not running."""
601
  if instance.admin_up:
602
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
603
                               (instance.name, reason), errors.ECODE_STATE)
604

    
605
  pnode = instance.primary_node
606
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
607
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
608
              prereq=True, ecode=errors.ECODE_ENVIRON)
609

    
610
  if instance.name in ins_l.payload:
611
    raise errors.OpPrereqError("Instance %s is running, %s" %
612
                               (instance.name, reason), errors.ECODE_STATE)
613

    
614

    
615
def _ExpandItemName(fn, name, kind):
616
  """Expand an item name.
617

618
  @param fn: the function to use for expansion
619
  @param name: requested item name
620
  @param kind: text description ('Node' or 'Instance')
621
  @return: the resolved (full) name
622
  @raise errors.OpPrereqError: if the item is not found
623

624
  """
625
  full_name = fn(name)
626
  if full_name is None:
627
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
628
                               errors.ECODE_NOENT)
629
  return full_name
630

    
631

    
632
def _ExpandNodeName(cfg, name):
633
  """Wrapper over L{_ExpandItemName} for nodes."""
634
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
635

    
636

    
637
def _ExpandInstanceName(cfg, name):
638
  """Wrapper over L{_ExpandItemName} for instance."""
639
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
640

    
641

    
642
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
643
                          memory, vcpus, nics, disk_template, disks,
644
                          bep, hvp, hypervisor_name):
645
  """Builds instance related env variables for hooks
646

647
  This builds the hook environment from individual variables.
648

649
  @type name: string
650
  @param name: the name of the instance
651
  @type primary_node: string
652
  @param primary_node: the name of the instance's primary node
653
  @type secondary_nodes: list
654
  @param secondary_nodes: list of secondary nodes as strings
655
  @type os_type: string
656
  @param os_type: the name of the instance's OS
657
  @type status: boolean
658
  @param status: the should_run status of the instance
659
  @type memory: string
660
  @param memory: the memory size of the instance
661
  @type vcpus: string
662
  @param vcpus: the count of VCPUs the instance has
663
  @type nics: list
664
  @param nics: list of tuples (ip, mac, mode, link) representing
665
      the NICs the instance has
666
  @type disk_template: string
667
  @param disk_template: the disk template of the instance
668
  @type disks: list
669
  @param disks: the list of (size, mode) pairs
670
  @type bep: dict
671
  @param bep: the backend parameters for the instance
672
  @type hvp: dict
673
  @param hvp: the hypervisor parameters for the instance
674
  @type hypervisor_name: string
675
  @param hypervisor_name: the hypervisor for the instance
676
  @rtype: dict
677
  @return: the hook environment for this instance
678

679
  """
680
  if status:
681
    str_status = "up"
682
  else:
683
    str_status = "down"
684
  env = {
685
    "OP_TARGET": name,
686
    "INSTANCE_NAME": name,
687
    "INSTANCE_PRIMARY": primary_node,
688
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
689
    "INSTANCE_OS_TYPE": os_type,
690
    "INSTANCE_STATUS": str_status,
691
    "INSTANCE_MEMORY": memory,
692
    "INSTANCE_VCPUS": vcpus,
693
    "INSTANCE_DISK_TEMPLATE": disk_template,
694
    "INSTANCE_HYPERVISOR": hypervisor_name,
695
  }
696

    
697
  if nics:
698
    nic_count = len(nics)
699
    for idx, (ip, mac, mode, link) in enumerate(nics):
700
      if ip is None:
701
        ip = ""
702
      env["INSTANCE_NIC%d_IP" % idx] = ip
703
      env["INSTANCE_NIC%d_MAC" % idx] = mac
704
      env["INSTANCE_NIC%d_MODE" % idx] = mode
705
      env["INSTANCE_NIC%d_LINK" % idx] = link
706
      if mode == constants.NIC_MODE_BRIDGED:
707
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
708
  else:
709
    nic_count = 0
710

    
711
  env["INSTANCE_NIC_COUNT"] = nic_count
712

    
713
  if disks:
714
    disk_count = len(disks)
715
    for idx, (size, mode) in enumerate(disks):
716
      env["INSTANCE_DISK%d_SIZE" % idx] = size
717
      env["INSTANCE_DISK%d_MODE" % idx] = mode
718
  else:
719
    disk_count = 0
720

    
721
  env["INSTANCE_DISK_COUNT"] = disk_count
722

    
723
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
724
    for key, value in source.items():
725
      env["INSTANCE_%s_%s" % (kind, key)] = value
726

    
727
  return env
728

    
729

    
730
def _NICListToTuple(lu, nics):
731
  """Build a list of nic information tuples.
732

733
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
734
  value in LUQueryInstanceData.
735

736
  @type lu:  L{LogicalUnit}
737
  @param lu: the logical unit on whose behalf we execute
738
  @type nics: list of L{objects.NIC}
739
  @param nics: list of nics to convert to hooks tuples
740

741
  """
742
  hooks_nics = []
743
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
744
  for nic in nics:
745
    ip = nic.ip
746
    mac = nic.mac
747
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
748
    mode = filled_params[constants.NIC_MODE]
749
    link = filled_params[constants.NIC_LINK]
750
    hooks_nics.append((ip, mac, mode, link))
751
  return hooks_nics
752

    
753

    
754
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
755
  """Builds instance related env variables for hooks from an object.
756

757
  @type lu: L{LogicalUnit}
758
  @param lu: the logical unit on whose behalf we execute
759
  @type instance: L{objects.Instance}
760
  @param instance: the instance for which we should build the
761
      environment
762
  @type override: dict
763
  @param override: dictionary with key/values that will override
764
      our values
765
  @rtype: dict
766
  @return: the hook environment dictionary
767

768
  """
769
  cluster = lu.cfg.GetClusterInfo()
770
  bep = cluster.FillBE(instance)
771
  hvp = cluster.FillHV(instance)
772
  args = {
773
    'name': instance.name,
774
    'primary_node': instance.primary_node,
775
    'secondary_nodes': instance.secondary_nodes,
776
    'os_type': instance.os,
777
    'status': instance.admin_up,
778
    'memory': bep[constants.BE_MEMORY],
779
    'vcpus': bep[constants.BE_VCPUS],
780
    'nics': _NICListToTuple(lu, instance.nics),
781
    'disk_template': instance.disk_template,
782
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
783
    'bep': bep,
784
    'hvp': hvp,
785
    'hypervisor_name': instance.hypervisor,
786
  }
787
  if override:
788
    args.update(override)
789
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
790

    
791

    
792
def _AdjustCandidatePool(lu, exceptions):
793
  """Adjust the candidate pool after node operations.
794

795
  """
796
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
797
  if mod_list:
798
    lu.LogInfo("Promoted nodes to master candidate role: %s",
799
               utils.CommaJoin(node.name for node in mod_list))
800
    for name in mod_list:
801
      lu.context.ReaddNode(name)
802
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
803
  if mc_now > mc_max:
804
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
805
               (mc_now, mc_max))
806

    
807

    
808
def _DecideSelfPromotion(lu, exceptions=None):
809
  """Decide whether I should promote myself as a master candidate.
810

811
  """
812
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
813
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
814
  # the new node will increase mc_max with one, so:
815
  mc_should = min(mc_should + 1, cp_size)
816
  return mc_now < mc_should
817

    
818

    
819
def _CheckNicsBridgesExist(lu, target_nics, target_node,
820
                               profile=constants.PP_DEFAULT):
821
  """Check that the brigdes needed by a list of nics exist.
822

823
  """
824
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
825
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
826
                for nic in target_nics]
827
  brlist = [params[constants.NIC_LINK] for params in paramslist
828
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
829
  if brlist:
830
    result = lu.rpc.call_bridges_exist(target_node, brlist)
831
    result.Raise("Error checking bridges on destination node '%s'" %
832
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
833

    
834

    
835
def _CheckInstanceBridgesExist(lu, instance, node=None):
836
  """Check that the brigdes needed by an instance exist.
837

838
  """
839
  if node is None:
840
    node = instance.primary_node
841
  _CheckNicsBridgesExist(lu, instance.nics, node)
842

    
843

    
844
def _CheckOSVariant(os_obj, name):
845
  """Check whether an OS name conforms to the os variants specification.
846

847
  @type os_obj: L{objects.OS}
848
  @param os_obj: OS object to check
849
  @type name: string
850
  @param name: OS name passed by the user, to check for validity
851

852
  """
853
  if not os_obj.supported_variants:
854
    return
855
  try:
856
    variant = name.split("+", 1)[1]
857
  except IndexError:
858
    raise errors.OpPrereqError("OS name must include a variant",
859
                               errors.ECODE_INVAL)
860

    
861
  if variant not in os_obj.supported_variants:
862
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
863

    
864

    
865
def _GetNodeInstancesInner(cfg, fn):
866
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
867

    
868

    
869
def _GetNodeInstances(cfg, node_name):
870
  """Returns a list of all primary and secondary instances on a node.
871

872
  """
873

    
874
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
875

    
876

    
877
def _GetNodePrimaryInstances(cfg, node_name):
878
  """Returns primary instances on a node.
879

880
  """
881
  return _GetNodeInstancesInner(cfg,
882
                                lambda inst: node_name == inst.primary_node)
883

    
884

    
885
def _GetNodeSecondaryInstances(cfg, node_name):
886
  """Returns secondary instances on a node.
887

888
  """
889
  return _GetNodeInstancesInner(cfg,
890
                                lambda inst: node_name in inst.secondary_nodes)
891

    
892

    
893
def _GetStorageTypeArgs(cfg, storage_type):
894
  """Returns the arguments for a storage type.
895

896
  """
897
  # Special case for file storage
898
  if storage_type == constants.ST_FILE:
899
    # storage.FileStorage wants a list of storage directories
900
    return [[cfg.GetFileStorageDir()]]
901

    
902
  return []
903

    
904

    
905
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
906
  faulty = []
907

    
908
  for dev in instance.disks:
909
    cfg.SetDiskID(dev, node_name)
910

    
911
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
912
  result.Raise("Failed to get disk status from node %s" % node_name,
913
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
914

    
915
  for idx, bdev_status in enumerate(result.payload):
916
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
917
      faulty.append(idx)
918

    
919
  return faulty
920

    
921

    
922
def _FormatTimestamp(secs):
923
  """Formats a Unix timestamp with the local timezone.
924

925
  """
926
  return time.strftime("%F %T %Z", time.gmtime(secs))
927

    
928

    
929
class LUPostInitCluster(LogicalUnit):
930
  """Logical unit for running hooks after cluster initialization.
931

932
  """
933
  HPATH = "cluster-init"
934
  HTYPE = constants.HTYPE_CLUSTER
935
  _OP_REQP = []
936

    
937
  def BuildHooksEnv(self):
938
    """Build hooks env.
939

940
    """
941
    env = {"OP_TARGET": self.cfg.GetClusterName()}
942
    mn = self.cfg.GetMasterNode()
943
    return env, [], [mn]
944

    
945
  def CheckPrereq(self):
946
    """No prerequisites to check.
947

948
    """
949
    return True
950

    
951
  def Exec(self, feedback_fn):
952
    """Nothing to do.
953

954
    """
955
    return True
956

    
957

    
958
class LUDestroyCluster(LogicalUnit):
959
  """Logical unit for destroying the cluster.
960

961
  """
962
  HPATH = "cluster-destroy"
963
  HTYPE = constants.HTYPE_CLUSTER
964
  _OP_REQP = []
965

    
966
  def BuildHooksEnv(self):
967
    """Build hooks env.
968

969
    """
970
    env = {"OP_TARGET": self.cfg.GetClusterName()}
971
    return env, [], []
972

    
973
  def CheckPrereq(self):
974
    """Check prerequisites.
975

976
    This checks whether the cluster is empty.
977

978
    Any errors are signaled by raising errors.OpPrereqError.
979

980
    """
981
    master = self.cfg.GetMasterNode()
982

    
983
    nodelist = self.cfg.GetNodeList()
984
    if len(nodelist) != 1 or nodelist[0] != master:
985
      raise errors.OpPrereqError("There are still %d node(s) in"
986
                                 " this cluster." % (len(nodelist) - 1),
987
                                 errors.ECODE_INVAL)
988
    instancelist = self.cfg.GetInstanceList()
989
    if instancelist:
990
      raise errors.OpPrereqError("There are still %d instance(s) in"
991
                                 " this cluster." % len(instancelist),
992
                                 errors.ECODE_INVAL)
993

    
994
  def Exec(self, feedback_fn):
995
    """Destroys the cluster.
996

997
    """
998
    master = self.cfg.GetMasterNode()
999
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1000

    
1001
    # Run post hooks on master node before it's removed
1002
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1003
    try:
1004
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1005
    except:
1006
      # pylint: disable-msg=W0702
1007
      self.LogWarning("Errors occurred running hooks on %s" % master)
1008

    
1009
    result = self.rpc.call_node_stop_master(master, False)
1010
    result.Raise("Could not disable the master role")
1011

    
1012
    if modify_ssh_setup:
1013
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1014
      utils.CreateBackup(priv_key)
1015
      utils.CreateBackup(pub_key)
1016

    
1017
    return master
1018

    
1019

    
1020
def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
1021
                            warn_days=constants.SSL_CERT_EXPIRATION_WARN,
1022
                            error_days=constants.SSL_CERT_EXPIRATION_ERROR):
1023
  """Verifies certificate details for LUVerifyCluster.
1024

1025
  """
1026
  if expired:
1027
    msg = "Certificate %s is expired" % filename
1028

    
1029
    if not_before is not None and not_after is not None:
1030
      msg += (" (valid from %s to %s)" %
1031
              (_FormatTimestamp(not_before),
1032
               _FormatTimestamp(not_after)))
1033
    elif not_before is not None:
1034
      msg += " (valid from %s)" % _FormatTimestamp(not_before)
1035
    elif not_after is not None:
1036
      msg += " (valid until %s)" % _FormatTimestamp(not_after)
1037

    
1038
    return (LUVerifyCluster.ETYPE_ERROR, msg)
1039

    
1040
  elif not_before is not None and not_before > now:
1041
    return (LUVerifyCluster.ETYPE_WARNING,
1042
            "Certificate %s not yet valid (valid from %s)" %
1043
            (filename, _FormatTimestamp(not_before)))
1044

    
1045
  elif not_after is not None:
1046
    remaining_days = int((not_after - now) / (24 * 3600))
1047

    
1048
    msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
1049

    
1050
    if remaining_days <= error_days:
1051
      return (LUVerifyCluster.ETYPE_ERROR, msg)
1052

    
1053
    if remaining_days <= warn_days:
1054
      return (LUVerifyCluster.ETYPE_WARNING, msg)
1055

    
1056
  return (None, None)
1057

    
1058

    
1059
def _VerifyCertificate(filename):
1060
  """Verifies a certificate for LUVerifyCluster.
1061

1062
  @type filename: string
1063
  @param filename: Path to PEM file
1064

1065
  """
1066
  try:
1067
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1068
                                           utils.ReadFile(filename))
1069
  except Exception, err: # pylint: disable-msg=W0703
1070
    return (LUVerifyCluster.ETYPE_ERROR,
1071
            "Failed to load X509 certificate %s: %s" % (filename, err))
1072

    
1073
  # Depending on the pyOpenSSL version, this can just return (None, None)
1074
  (not_before, not_after) = utils.GetX509CertValidity(cert)
1075

    
1076
  return _VerifyCertificateInner(filename, cert.has_expired(),
1077
                                 not_before, not_after, time.time())
1078

    
1079

    
1080
class LUVerifyCluster(LogicalUnit):
1081
  """Verifies the cluster status.
1082

1083
  """
1084
  HPATH = "cluster-verify"
1085
  HTYPE = constants.HTYPE_CLUSTER
1086
  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1087
  REQ_BGL = False
1088

    
1089
  TCLUSTER = "cluster"
1090
  TNODE = "node"
1091
  TINSTANCE = "instance"
1092

    
1093
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1094
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1095
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1096
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1097
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1098
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1099
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1100
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1101
  ENODEDRBD = (TNODE, "ENODEDRBD")
1102
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1103
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1104
  ENODEHV = (TNODE, "ENODEHV")
1105
  ENODELVM = (TNODE, "ENODELVM")
1106
  ENODEN1 = (TNODE, "ENODEN1")
1107
  ENODENET = (TNODE, "ENODENET")
1108
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1109
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1110
  ENODERPC = (TNODE, "ENODERPC")
1111
  ENODESSH = (TNODE, "ENODESSH")
1112
  ENODEVERSION = (TNODE, "ENODEVERSION")
1113
  ENODESETUP = (TNODE, "ENODESETUP")
1114
  ENODETIME = (TNODE, "ENODETIME")
1115

    
1116
  ETYPE_FIELD = "code"
1117
  ETYPE_ERROR = "ERROR"
1118
  ETYPE_WARNING = "WARNING"
1119

    
1120
  class NodeImage(object):
1121
    """A class representing the logical and physical status of a node.
1122

1123
    @ivar volumes: a structure as returned from
1124
        L{ganeti.backend.GetVolumeList} (runtime)
1125
    @ivar instances: a list of running instances (runtime)
1126
    @ivar pinst: list of configured primary instances (config)
1127
    @ivar sinst: list of configured secondary instances (config)
1128
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1129
        of this node (config)
1130
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1131
    @ivar dfree: free disk, as reported by the node (runtime)
1132
    @ivar offline: the offline status (config)
1133
    @type rpc_fail: boolean
1134
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1135
        not whether the individual keys were correct) (runtime)
1136
    @type lvm_fail: boolean
1137
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1138
    @type hyp_fail: boolean
1139
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1140
    @type ghost: boolean
1141
    @ivar ghost: whether this is a known node or not (config)
1142

1143
    """
1144
    def __init__(self, offline=False):
1145
      self.volumes = {}
1146
      self.instances = []
1147
      self.pinst = []
1148
      self.sinst = []
1149
      self.sbp = {}
1150
      self.mfree = 0
1151
      self.dfree = 0
1152
      self.offline = offline
1153
      self.rpc_fail = False
1154
      self.lvm_fail = False
1155
      self.hyp_fail = False
1156
      self.ghost = False
1157

    
1158
  def ExpandNames(self):
1159
    self.needed_locks = {
1160
      locking.LEVEL_NODE: locking.ALL_SET,
1161
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1162
    }
1163
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1164

    
1165
  def _Error(self, ecode, item, msg, *args, **kwargs):
1166
    """Format an error message.
1167

1168
    Based on the opcode's error_codes parameter, either format a
1169
    parseable error code, or a simpler error string.
1170

1171
    This must be called only from Exec and functions called from Exec.
1172

1173
    """
1174
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1175
    itype, etxt = ecode
1176
    # first complete the msg
1177
    if args:
1178
      msg = msg % args
1179
    # then format the whole message
1180
    if self.op.error_codes:
1181
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1182
    else:
1183
      if item:
1184
        item = " " + item
1185
      else:
1186
        item = ""
1187
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1188
    # and finally report it via the feedback_fn
1189
    self._feedback_fn("  - %s" % msg)
1190

    
1191
  def _ErrorIf(self, cond, *args, **kwargs):
1192
    """Log an error message if the passed condition is True.
1193

1194
    """
1195
    cond = bool(cond) or self.op.debug_simulate_errors
1196
    if cond:
1197
      self._Error(*args, **kwargs)
1198
    # do not mark the operation as failed for WARN cases only
1199
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1200
      self.bad = self.bad or cond
1201

    
1202
  def _VerifyNode(self, ninfo, nresult):
1203
    """Run multiple tests against a node.
1204

1205
    Test list:
1206

1207
      - compares ganeti version
1208
      - checks vg existence and size > 20G
1209
      - checks config file checksum
1210
      - checks ssh to other nodes
1211

1212
    @type ninfo: L{objects.Node}
1213
    @param ninfo: the node to check
1214
    @param nresult: the results from the node
1215
    @rtype: boolean
1216
    @return: whether overall this call was successful (and we can expect
1217
         reasonable values in the respose)
1218

1219
    """
1220
    node = ninfo.name
1221
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1222

    
1223
    # main result, nresult should be a non-empty dict
1224
    test = not nresult or not isinstance(nresult, dict)
1225
    _ErrorIf(test, self.ENODERPC, node,
1226
                  "unable to verify node: no data returned")
1227
    if test:
1228
      return False
1229

    
1230
    # compares ganeti version
1231
    local_version = constants.PROTOCOL_VERSION
1232
    remote_version = nresult.get("version", None)
1233
    test = not (remote_version and
1234
                isinstance(remote_version, (list, tuple)) and
1235
                len(remote_version) == 2)
1236
    _ErrorIf(test, self.ENODERPC, node,
1237
             "connection to node returned invalid data")
1238
    if test:
1239
      return False
1240

    
1241
    test = local_version != remote_version[0]
1242
    _ErrorIf(test, self.ENODEVERSION, node,
1243
             "incompatible protocol versions: master %s,"
1244
             " node %s", local_version, remote_version[0])
1245
    if test:
1246
      return False
1247

    
1248
    # node seems compatible, we can actually try to look into its results
1249

    
1250
    # full package version
1251
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1252
                  self.ENODEVERSION, node,
1253
                  "software version mismatch: master %s, node %s",
1254
                  constants.RELEASE_VERSION, remote_version[1],
1255
                  code=self.ETYPE_WARNING)
1256

    
1257
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1258
    if isinstance(hyp_result, dict):
1259
      for hv_name, hv_result in hyp_result.iteritems():
1260
        test = hv_result is not None
1261
        _ErrorIf(test, self.ENODEHV, node,
1262
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1263

    
1264

    
1265
    test = nresult.get(constants.NV_NODESETUP,
1266
                           ["Missing NODESETUP results"])
1267
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1268
             "; ".join(test))
1269

    
1270
    return True
1271

    
1272
  def _VerifyNodeTime(self, ninfo, nresult,
1273
                      nvinfo_starttime, nvinfo_endtime):
1274
    """Check the node time.
1275

1276
    @type ninfo: L{objects.Node}
1277
    @param ninfo: the node to check
1278
    @param nresult: the remote results for the node
1279
    @param nvinfo_starttime: the start time of the RPC call
1280
    @param nvinfo_endtime: the end time of the RPC call
1281

1282
    """
1283
    node = ninfo.name
1284
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1285

    
1286
    ntime = nresult.get(constants.NV_TIME, None)
1287
    try:
1288
      ntime_merged = utils.MergeTime(ntime)
1289
    except (ValueError, TypeError):
1290
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1291
      return
1292

    
1293
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1294
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1295
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1296
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1297
    else:
1298
      ntime_diff = None
1299

    
1300
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1301
             "Node time diverges by at least %s from master node time",
1302
             ntime_diff)
1303

    
1304
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1305
    """Check the node time.
1306

1307
    @type ninfo: L{objects.Node}
1308
    @param ninfo: the node to check
1309
    @param nresult: the remote results for the node
1310
    @param vg_name: the configured VG name
1311

1312
    """
1313
    if vg_name is None:
1314
      return
1315

    
1316
    node = ninfo.name
1317
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1318

    
1319
    # checks vg existence and size > 20G
1320
    vglist = nresult.get(constants.NV_VGLIST, None)
1321
    test = not vglist
1322
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1323
    if not test:
1324
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1325
                                            constants.MIN_VG_SIZE)
1326
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1327

    
1328
    # check pv names
1329
    pvlist = nresult.get(constants.NV_PVLIST, None)
1330
    test = pvlist is None
1331
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1332
    if not test:
1333
      # check that ':' is not present in PV names, since it's a
1334
      # special character for lvcreate (denotes the range of PEs to
1335
      # use on the PV)
1336
      for _, pvname, owner_vg in pvlist:
1337
        test = ":" in pvname
1338
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1339
                 " '%s' of VG '%s'", pvname, owner_vg)
1340

    
1341
  def _VerifyNodeNetwork(self, ninfo, nresult):
1342
    """Check the node time.
1343

1344
    @type ninfo: L{objects.Node}
1345
    @param ninfo: the node to check
1346
    @param nresult: the remote results for the node
1347

1348
    """
1349
    node = ninfo.name
1350
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1351

    
1352
    test = constants.NV_NODELIST not in nresult
1353
    _ErrorIf(test, self.ENODESSH, node,
1354
             "node hasn't returned node ssh connectivity data")
1355
    if not test:
1356
      if nresult[constants.NV_NODELIST]:
1357
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1358
          _ErrorIf(True, self.ENODESSH, node,
1359
                   "ssh communication with node '%s': %s", a_node, a_msg)
1360

    
1361
    test = constants.NV_NODENETTEST not in nresult
1362
    _ErrorIf(test, self.ENODENET, node,
1363
             "node hasn't returned node tcp connectivity data")
1364
    if not test:
1365
      if nresult[constants.NV_NODENETTEST]:
1366
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1367
        for anode in nlist:
1368
          _ErrorIf(True, self.ENODENET, node,
1369
                   "tcp communication with node '%s': %s",
1370
                   anode, nresult[constants.NV_NODENETTEST][anode])
1371

    
1372
  def _VerifyInstance(self, instance, instanceconfig, node_image):
1373
    """Verify an instance.
1374

1375
    This function checks to see if the required block devices are
1376
    available on the instance's node.
1377

1378
    """
1379
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1380
    node_current = instanceconfig.primary_node
1381

    
1382
    node_vol_should = {}
1383
    instanceconfig.MapLVsByNode(node_vol_should)
1384

    
1385
    for node in node_vol_should:
1386
      n_img = node_image[node]
1387
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1388
        # ignore missing volumes on offline or broken nodes
1389
        continue
1390
      for volume in node_vol_should[node]:
1391
        test = volume not in n_img.volumes
1392
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1393
                 "volume %s missing on node %s", volume, node)
1394

    
1395
    if instanceconfig.admin_up:
1396
      pri_img = node_image[node_current]
1397
      test = instance not in pri_img.instances and not pri_img.offline
1398
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1399
               "instance not running on its primary node %s",
1400
               node_current)
1401

    
1402
    for node, n_img in node_image.items():
1403
      if (not node == node_current):
1404
        test = instance in n_img.instances
1405
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1406
                 "instance should not run on node %s", node)
1407

    
1408
  def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1409
    """Verify if there are any unknown volumes in the cluster.
1410

1411
    The .os, .swap and backup volumes are ignored. All other volumes are
1412
    reported as unknown.
1413

1414
    """
1415
    for node, n_img in node_image.items():
1416
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1417
        # skip non-healthy nodes
1418
        continue
1419
      for volume in n_img.volumes:
1420
        test = (node not in node_vol_should or
1421
                volume not in node_vol_should[node])
1422
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1423
                      "volume %s is unknown", volume)
1424

    
1425
  def _VerifyOrphanInstances(self, instancelist, node_image):
1426
    """Verify the list of running instances.
1427

1428
    This checks what instances are running but unknown to the cluster.
1429

1430
    """
1431
    for node, n_img in node_image.items():
1432
      for o_inst in n_img.instances:
1433
        test = o_inst not in instancelist
1434
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1435
                      "instance %s on node %s should not exist", o_inst, node)
1436

    
1437
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1438
    """Verify N+1 Memory Resilience.
1439

1440
    Check that if one single node dies we can still start all the
1441
    instances it was primary for.
1442

1443
    """
1444
    for node, n_img in node_image.items():
1445
      # This code checks that every node which is now listed as
1446
      # secondary has enough memory to host all instances it is
1447
      # supposed to should a single other node in the cluster fail.
1448
      # FIXME: not ready for failover to an arbitrary node
1449
      # FIXME: does not support file-backed instances
1450
      # WARNING: we currently take into account down instances as well
1451
      # as up ones, considering that even if they're down someone
1452
      # might want to start them even in the event of a node failure.
1453
      for prinode, instances in n_img.sbp.items():
1454
        needed_mem = 0
1455
        for instance in instances:
1456
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1457
          if bep[constants.BE_AUTO_BALANCE]:
1458
            needed_mem += bep[constants.BE_MEMORY]
1459
        test = n_img.mfree < needed_mem
1460
        self._ErrorIf(test, self.ENODEN1, node,
1461
                      "not enough memory on to accommodate"
1462
                      " failovers should peer node %s fail", prinode)
1463

    
1464
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1465
                       master_files):
1466
    """Verifies and computes the node required file checksums.
1467

1468
    @type ninfo: L{objects.Node}
1469
    @param ninfo: the node to check
1470
    @param nresult: the remote results for the node
1471
    @param file_list: required list of files
1472
    @param local_cksum: dictionary of local files and their checksums
1473
    @param master_files: list of files that only masters should have
1474

1475
    """
1476
    node = ninfo.name
1477
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1478

    
1479
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1480
    test = not isinstance(remote_cksum, dict)
1481
    _ErrorIf(test, self.ENODEFILECHECK, node,
1482
             "node hasn't returned file checksum data")
1483
    if test:
1484
      return
1485

    
1486
    for file_name in file_list:
1487
      node_is_mc = ninfo.master_candidate
1488
      must_have = (file_name not in master_files) or node_is_mc
1489
      # missing
1490
      test1 = file_name not in remote_cksum
1491
      # invalid checksum
1492
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1493
      # existing and good
1494
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1495
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1496
               "file '%s' missing", file_name)
1497
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1498
               "file '%s' has wrong checksum", file_name)
1499
      # not candidate and this is not a must-have file
1500
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1501
               "file '%s' should not exist on non master"
1502
               " candidates (and the file is outdated)", file_name)
1503
      # all good, except non-master/non-must have combination
1504
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1505
               "file '%s' should not exist"
1506
               " on non master candidates", file_name)
1507

    
1508
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1509
    """Verifies and the node DRBD status.
1510

1511
    @type ninfo: L{objects.Node}
1512
    @param ninfo: the node to check
1513
    @param nresult: the remote results for the node
1514
    @param instanceinfo: the dict of instances
1515
    @param drbd_map: the DRBD map as returned by
1516
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1517

1518
    """
1519
    node = ninfo.name
1520
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1521

    
1522
    # compute the DRBD minors
1523
    node_drbd = {}
1524
    for minor, instance in drbd_map[node].items():
1525
      test = instance not in instanceinfo
1526
      _ErrorIf(test, self.ECLUSTERCFG, None,
1527
               "ghost instance '%s' in temporary DRBD map", instance)
1528
        # ghost instance should not be running, but otherwise we
1529
        # don't give double warnings (both ghost instance and
1530
        # unallocated minor in use)
1531
      if test:
1532
        node_drbd[minor] = (instance, False)
1533
      else:
1534
        instance = instanceinfo[instance]
1535
        node_drbd[minor] = (instance.name, instance.admin_up)
1536

    
1537
    # and now check them
1538
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1539
    test = not isinstance(used_minors, (tuple, list))
1540
    _ErrorIf(test, self.ENODEDRBD, node,
1541
             "cannot parse drbd status file: %s", str(used_minors))
1542
    if test:
1543
      # we cannot check drbd status
1544
      return
1545

    
1546
    for minor, (iname, must_exist) in node_drbd.items():
1547
      test = minor not in used_minors and must_exist
1548
      _ErrorIf(test, self.ENODEDRBD, node,
1549
               "drbd minor %d of instance %s is not active", minor, iname)
1550
    for minor in used_minors:
1551
      test = minor not in node_drbd
1552
      _ErrorIf(test, self.ENODEDRBD, node,
1553
               "unallocated drbd minor %d is in use", minor)
1554

    
1555
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1556
    """Verifies and updates the node volume data.
1557

1558
    This function will update a L{NodeImage}'s internal structures
1559
    with data from the remote call.
1560

1561
    @type ninfo: L{objects.Node}
1562
    @param ninfo: the node to check
1563
    @param nresult: the remote results for the node
1564
    @param nimg: the node image object
1565
    @param vg_name: the configured VG name
1566

1567
    """
1568
    node = ninfo.name
1569
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1570

    
1571
    nimg.lvm_fail = True
1572
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1573
    if vg_name is None:
1574
      pass
1575
    elif isinstance(lvdata, basestring):
1576
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1577
               utils.SafeEncode(lvdata))
1578
    elif not isinstance(lvdata, dict):
1579
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1580
    else:
1581
      nimg.volumes = lvdata
1582
      nimg.lvm_fail = False
1583

    
1584
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1585
    """Verifies and updates the node instance list.
1586

1587
    If the listing was successful, then updates this node's instance
1588
    list. Otherwise, it marks the RPC call as failed for the instance
1589
    list key.
1590

1591
    @type ninfo: L{objects.Node}
1592
    @param ninfo: the node to check
1593
    @param nresult: the remote results for the node
1594
    @param nimg: the node image object
1595

1596
    """
1597
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1598
    test = not isinstance(idata, list)
1599
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1600
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1601
    if test:
1602
      nimg.hyp_fail = True
1603
    else:
1604
      nimg.instances = idata
1605

    
1606
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1607
    """Verifies and computes a node information map
1608

1609
    @type ninfo: L{objects.Node}
1610
    @param ninfo: the node to check
1611
    @param nresult: the remote results for the node
1612
    @param nimg: the node image object
1613
    @param vg_name: the configured VG name
1614

1615
    """
1616
    node = ninfo.name
1617
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1618

    
1619
    # try to read free memory (from the hypervisor)
1620
    hv_info = nresult.get(constants.NV_HVINFO, None)
1621
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1622
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1623
    if not test:
1624
      try:
1625
        nimg.mfree = int(hv_info["memory_free"])
1626
      except (ValueError, TypeError):
1627
        _ErrorIf(True, self.ENODERPC, node,
1628
                 "node returned invalid nodeinfo, check hypervisor")
1629

    
1630
    # FIXME: devise a free space model for file based instances as well
1631
    if vg_name is not None:
1632
      test = (constants.NV_VGLIST not in nresult or
1633
              vg_name not in nresult[constants.NV_VGLIST])
1634
      _ErrorIf(test, self.ENODELVM, node,
1635
               "node didn't return data for the volume group '%s'"
1636
               " - it is either missing or broken", vg_name)
1637
      if not test:
1638
        try:
1639
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1640
        except (ValueError, TypeError):
1641
          _ErrorIf(True, self.ENODERPC, node,
1642
                   "node returned invalid LVM info, check LVM status")
1643

    
1644
  def CheckPrereq(self):
1645
    """Check prerequisites.
1646

1647
    Transform the list of checks we're going to skip into a set and check that
1648
    all its members are valid.
1649

1650
    """
1651
    self.skip_set = frozenset(self.op.skip_checks)
1652
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1653
      raise errors.OpPrereqError("Invalid checks to be skipped specified",
1654
                                 errors.ECODE_INVAL)
1655

    
1656
  def BuildHooksEnv(self):
1657
    """Build hooks env.
1658

1659
    Cluster-Verify hooks just ran in the post phase and their failure makes
1660
    the output be logged in the verify output and the verification to fail.
1661

1662
    """
1663
    all_nodes = self.cfg.GetNodeList()
1664
    env = {
1665
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1666
      }
1667
    for node in self.cfg.GetAllNodesInfo().values():
1668
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1669

    
1670
    return env, [], all_nodes
1671

    
1672
  def Exec(self, feedback_fn):
1673
    """Verify integrity of cluster, performing various test on nodes.
1674

1675
    """
1676
    self.bad = False
1677
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1678
    verbose = self.op.verbose
1679
    self._feedback_fn = feedback_fn
1680
    feedback_fn("* Verifying global settings")
1681
    for msg in self.cfg.VerifyConfig():
1682
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1683

    
1684
    # Check the cluster certificates
1685
    for cert_filename in constants.ALL_CERT_FILES:
1686
      (errcode, msg) = _VerifyCertificate(cert_filename)
1687
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1688

    
1689
    vg_name = self.cfg.GetVGName()
1690
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1691
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1692
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1693
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1694
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1695
                        for iname in instancelist)
1696
    i_non_redundant = [] # Non redundant instances
1697
    i_non_a_balanced = [] # Non auto-balanced instances
1698
    n_offline = 0 # Count of offline nodes
1699
    n_drained = 0 # Count of nodes being drained
1700
    node_vol_should = {}
1701

    
1702
    # FIXME: verify OS list
1703
    # do local checksums
1704
    master_files = [constants.CLUSTER_CONF_FILE]
1705

    
1706
    file_names = ssconf.SimpleStore().GetFileList()
1707
    file_names.extend(constants.ALL_CERT_FILES)
1708
    file_names.extend(master_files)
1709

    
1710
    local_checksums = utils.FingerprintFiles(file_names)
1711

    
1712
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1713
    node_verify_param = {
1714
      constants.NV_FILELIST: file_names,
1715
      constants.NV_NODELIST: [node.name for node in nodeinfo
1716
                              if not node.offline],
1717
      constants.NV_HYPERVISOR: hypervisors,
1718
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1719
                                  node.secondary_ip) for node in nodeinfo
1720
                                 if not node.offline],
1721
      constants.NV_INSTANCELIST: hypervisors,
1722
      constants.NV_VERSION: None,
1723
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1724
      constants.NV_NODESETUP: None,
1725
      constants.NV_TIME: None,
1726
      }
1727

    
1728
    if vg_name is not None:
1729
      node_verify_param[constants.NV_VGLIST] = None
1730
      node_verify_param[constants.NV_LVLIST] = vg_name
1731
      node_verify_param[constants.NV_PVLIST] = [vg_name]
1732
      node_verify_param[constants.NV_DRBDLIST] = None
1733

    
1734
    # Build our expected cluster state
1735
    node_image = dict((node.name, self.NodeImage(offline=node.offline))
1736
                      for node in nodeinfo)
1737

    
1738
    for instance in instancelist:
1739
      inst_config = instanceinfo[instance]
1740

    
1741
      for nname in inst_config.all_nodes:
1742
        if nname not in node_image:
1743
          # ghost node
1744
          gnode = self.NodeImage()
1745
          gnode.ghost = True
1746
          node_image[nname] = gnode
1747

    
1748
      inst_config.MapLVsByNode(node_vol_should)
1749

    
1750
      pnode = inst_config.primary_node
1751
      node_image[pnode].pinst.append(instance)
1752

    
1753
      for snode in inst_config.secondary_nodes:
1754
        nimg = node_image[snode]
1755
        nimg.sinst.append(instance)
1756
        if pnode not in nimg.sbp:
1757
          nimg.sbp[pnode] = []
1758
        nimg.sbp[pnode].append(instance)
1759

    
1760
    # At this point, we have the in-memory data structures complete,
1761
    # except for the runtime information, which we'll gather next
1762

    
1763
    # Due to the way our RPC system works, exact response times cannot be
1764
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1765
    # time before and after executing the request, we can at least have a time
1766
    # window.
1767
    nvinfo_starttime = time.time()
1768
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1769
                                           self.cfg.GetClusterName())
1770
    nvinfo_endtime = time.time()
1771

    
1772
    cluster = self.cfg.GetClusterInfo()
1773
    master_node = self.cfg.GetMasterNode()
1774
    all_drbd_map = self.cfg.ComputeDRBDMap()
1775

    
1776
    feedback_fn("* Verifying node status")
1777
    for node_i in nodeinfo:
1778
      node = node_i.name
1779
      nimg = node_image[node]
1780

    
1781
      if node_i.offline:
1782
        if verbose:
1783
          feedback_fn("* Skipping offline node %s" % (node,))
1784
        n_offline += 1
1785
        continue
1786

    
1787
      if node == master_node:
1788
        ntype = "master"
1789
      elif node_i.master_candidate:
1790
        ntype = "master candidate"
1791
      elif node_i.drained:
1792
        ntype = "drained"
1793
        n_drained += 1
1794
      else:
1795
        ntype = "regular"
1796
      if verbose:
1797
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1798

    
1799
      msg = all_nvinfo[node].fail_msg
1800
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1801
      if msg:
1802
        nimg.rpc_fail = True
1803
        continue
1804

    
1805
      nresult = all_nvinfo[node].payload
1806

    
1807
      nimg.call_ok = self._VerifyNode(node_i, nresult)
1808
      self._VerifyNodeNetwork(node_i, nresult)
1809
      self._VerifyNodeLVM(node_i, nresult, vg_name)
1810
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1811
                            master_files)
1812
      self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1813
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1814

    
1815
      self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1816
      self._UpdateNodeInstances(node_i, nresult, nimg)
1817
      self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1818

    
1819
    feedback_fn("* Verifying instance status")
1820
    for instance in instancelist:
1821
      if verbose:
1822
        feedback_fn("* Verifying instance %s" % instance)
1823
      inst_config = instanceinfo[instance]
1824
      self._VerifyInstance(instance, inst_config, node_image)
1825
      inst_nodes_offline = []
1826

    
1827
      pnode = inst_config.primary_node
1828
      pnode_img = node_image[pnode]
1829
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1830
               self.ENODERPC, pnode, "instance %s, connection to"
1831
               " primary node failed", instance)
1832

    
1833
      if pnode_img.offline:
1834
        inst_nodes_offline.append(pnode)
1835

    
1836
      # If the instance is non-redundant we cannot survive losing its primary
1837
      # node, so we are not N+1 compliant. On the other hand we have no disk
1838
      # templates with more than one secondary so that situation is not well
1839
      # supported either.
1840
      # FIXME: does not support file-backed instances
1841
      if not inst_config.secondary_nodes:
1842
        i_non_redundant.append(instance)
1843
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1844
               instance, "instance has multiple secondary nodes: %s",
1845
               utils.CommaJoin(inst_config.secondary_nodes),
1846
               code=self.ETYPE_WARNING)
1847

    
1848
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1849
        i_non_a_balanced.append(instance)
1850

    
1851
      for snode in inst_config.secondary_nodes:
1852
        s_img = node_image[snode]
1853
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1854
                 "instance %s, connection to secondary node failed", instance)
1855

    
1856
        if s_img.offline:
1857
          inst_nodes_offline.append(snode)
1858

    
1859
      # warn that the instance lives on offline nodes
1860
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1861
               "instance lives on offline node(s) %s",
1862
               utils.CommaJoin(inst_nodes_offline))
1863
      # ... or ghost nodes
1864
      for node in inst_config.all_nodes:
1865
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
1866
                 "instance lives on ghost node %s", node)
1867

    
1868
    feedback_fn("* Verifying orphan volumes")
1869
    self._VerifyOrphanVolumes(node_vol_should, node_image)
1870

    
1871
    feedback_fn("* Verifying oprhan instances")
1872
    self._VerifyOrphanInstances(instancelist, node_image)
1873

    
1874
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1875
      feedback_fn("* Verifying N+1 Memory redundancy")
1876
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
1877

    
1878
    feedback_fn("* Other Notes")
1879
    if i_non_redundant:
1880
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1881
                  % len(i_non_redundant))
1882

    
1883
    if i_non_a_balanced:
1884
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1885
                  % len(i_non_a_balanced))
1886

    
1887
    if n_offline:
1888
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
1889

    
1890
    if n_drained:
1891
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
1892

    
1893
    return not self.bad
1894

    
1895
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1896
    """Analyze the post-hooks' result
1897

1898
    This method analyses the hook result, handles it, and sends some
1899
    nicely-formatted feedback back to the user.
1900

1901
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1902
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1903
    @param hooks_results: the results of the multi-node hooks rpc call
1904
    @param feedback_fn: function used send feedback back to the caller
1905
    @param lu_result: previous Exec result
1906
    @return: the new Exec result, based on the previous result
1907
        and hook results
1908

1909
    """
1910
    # We only really run POST phase hooks, and are only interested in
1911
    # their results
1912
    if phase == constants.HOOKS_PHASE_POST:
1913
      # Used to change hooks' output to proper indentation
1914
      indent_re = re.compile('^', re.M)
1915
      feedback_fn("* Hooks Results")
1916
      assert hooks_results, "invalid result from hooks"
1917

    
1918
      for node_name in hooks_results:
1919
        res = hooks_results[node_name]
1920
        msg = res.fail_msg
1921
        test = msg and not res.offline
1922
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
1923
                      "Communication failure in hooks execution: %s", msg)
1924
        if res.offline or msg:
1925
          # No need to investigate payload if node is offline or gave an error.
1926
          # override manually lu_result here as _ErrorIf only
1927
          # overrides self.bad
1928
          lu_result = 1
1929
          continue
1930
        for script, hkr, output in res.payload:
1931
          test = hkr == constants.HKR_FAIL
1932
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
1933
                        "Script %s failed, output:", script)
1934
          if test:
1935
            output = indent_re.sub('      ', output)
1936
            feedback_fn("%s" % output)
1937
            lu_result = 0
1938

    
1939
      return lu_result
1940

    
1941

    
1942
class LUVerifyDisks(NoHooksLU):
1943
  """Verifies the cluster disks status.
1944

1945
  """
1946
  _OP_REQP = []
1947
  REQ_BGL = False
1948

    
1949
  def ExpandNames(self):
1950
    self.needed_locks = {
1951
      locking.LEVEL_NODE: locking.ALL_SET,
1952
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1953
    }
1954
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1955

    
1956
  def CheckPrereq(self):
1957
    """Check prerequisites.
1958

1959
    This has no prerequisites.
1960

1961
    """
1962
    pass
1963

    
1964
  def Exec(self, feedback_fn):
1965
    """Verify integrity of cluster disks.
1966

1967
    @rtype: tuple of three items
1968
    @return: a tuple of (dict of node-to-node_error, list of instances
1969
        which need activate-disks, dict of instance: (node, volume) for
1970
        missing volumes
1971

1972
    """
1973
    result = res_nodes, res_instances, res_missing = {}, [], {}
1974

    
1975
    vg_name = self.cfg.GetVGName()
1976
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1977
    instances = [self.cfg.GetInstanceInfo(name)
1978
                 for name in self.cfg.GetInstanceList()]
1979

    
1980
    nv_dict = {}
1981
    for inst in instances:
1982
      inst_lvs = {}
1983
      if (not inst.admin_up or
1984
          inst.disk_template not in constants.DTS_NET_MIRROR):
1985
        continue
1986
      inst.MapLVsByNode(inst_lvs)
1987
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1988
      for node, vol_list in inst_lvs.iteritems():
1989
        for vol in vol_list:
1990
          nv_dict[(node, vol)] = inst
1991

    
1992
    if not nv_dict:
1993
      return result
1994

    
1995
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1996

    
1997
    for node in nodes:
1998
      # node_volume
1999
      node_res = node_lvs[node]
2000
      if node_res.offline:
2001
        continue
2002
      msg = node_res.fail_msg
2003
      if msg:
2004
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2005
        res_nodes[node] = msg
2006
        continue
2007

    
2008
      lvs = node_res.payload
2009
      for lv_name, (_, _, lv_online) in lvs.items():
2010
        inst = nv_dict.pop((node, lv_name), None)
2011
        if (not lv_online and inst is not None
2012
            and inst.name not in res_instances):
2013
          res_instances.append(inst.name)
2014

    
2015
    # any leftover items in nv_dict are missing LVs, let's arrange the
2016
    # data better
2017
    for key, inst in nv_dict.iteritems():
2018
      if inst.name not in res_missing:
2019
        res_missing[inst.name] = []
2020
      res_missing[inst.name].append(key)
2021

    
2022
    return result
2023

    
2024

    
2025
class LURepairDiskSizes(NoHooksLU):
2026
  """Verifies the cluster disks sizes.
2027

2028
  """
2029
  _OP_REQP = ["instances"]
2030
  REQ_BGL = False
2031

    
2032
  def ExpandNames(self):
2033
    if not isinstance(self.op.instances, list):
2034
      raise errors.OpPrereqError("Invalid argument type 'instances'",
2035
                                 errors.ECODE_INVAL)
2036

    
2037
    if self.op.instances:
2038
      self.wanted_names = []
2039
      for name in self.op.instances:
2040
        full_name = _ExpandInstanceName(self.cfg, name)
2041
        self.wanted_names.append(full_name)
2042
      self.needed_locks = {
2043
        locking.LEVEL_NODE: [],
2044
        locking.LEVEL_INSTANCE: self.wanted_names,
2045
        }
2046
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2047
    else:
2048
      self.wanted_names = None
2049
      self.needed_locks = {
2050
        locking.LEVEL_NODE: locking.ALL_SET,
2051
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2052
        }
2053
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2054

    
2055
  def DeclareLocks(self, level):
2056
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2057
      self._LockInstancesNodes(primary_only=True)
2058

    
2059
  def CheckPrereq(self):
2060
    """Check prerequisites.
2061

2062
    This only checks the optional instance list against the existing names.
2063

2064
    """
2065
    if self.wanted_names is None:
2066
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2067

    
2068
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2069
                             in self.wanted_names]
2070

    
2071
  def _EnsureChildSizes(self, disk):
2072
    """Ensure children of the disk have the needed disk size.
2073

2074
    This is valid mainly for DRBD8 and fixes an issue where the
2075
    children have smaller disk size.
2076

2077
    @param disk: an L{ganeti.objects.Disk} object
2078

2079
    """
2080
    if disk.dev_type == constants.LD_DRBD8:
2081
      assert disk.children, "Empty children for DRBD8?"
2082
      fchild = disk.children[0]
2083
      mismatch = fchild.size < disk.size
2084
      if mismatch:
2085
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2086
                     fchild.size, disk.size)
2087
        fchild.size = disk.size
2088

    
2089
      # and we recurse on this child only, not on the metadev
2090
      return self._EnsureChildSizes(fchild) or mismatch
2091
    else:
2092
      return False
2093

    
2094
  def Exec(self, feedback_fn):
2095
    """Verify the size of cluster disks.
2096

2097
    """
2098
    # TODO: check child disks too
2099
    # TODO: check differences in size between primary/secondary nodes
2100
    per_node_disks = {}
2101
    for instance in self.wanted_instances:
2102
      pnode = instance.primary_node
2103
      if pnode not in per_node_disks:
2104
        per_node_disks[pnode] = []
2105
      for idx, disk in enumerate(instance.disks):
2106
        per_node_disks[pnode].append((instance, idx, disk))
2107

    
2108
    changed = []
2109
    for node, dskl in per_node_disks.items():
2110
      newl = [v[2].Copy() for v in dskl]
2111
      for dsk in newl:
2112
        self.cfg.SetDiskID(dsk, node)
2113
      result = self.rpc.call_blockdev_getsizes(node, newl)
2114
      if result.fail_msg:
2115
        self.LogWarning("Failure in blockdev_getsizes call to node"
2116
                        " %s, ignoring", node)
2117
        continue
2118
      if len(result.data) != len(dskl):
2119
        self.LogWarning("Invalid result from node %s, ignoring node results",
2120
                        node)
2121
        continue
2122
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2123
        if size is None:
2124
          self.LogWarning("Disk %d of instance %s did not return size"
2125
                          " information, ignoring", idx, instance.name)
2126
          continue
2127
        if not isinstance(size, (int, long)):
2128
          self.LogWarning("Disk %d of instance %s did not return valid"
2129
                          " size information, ignoring", idx, instance.name)
2130
          continue
2131
        size = size >> 20
2132
        if size != disk.size:
2133
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2134
                       " correcting: recorded %d, actual %d", idx,
2135
                       instance.name, disk.size, size)
2136
          disk.size = size
2137
          self.cfg.Update(instance, feedback_fn)
2138
          changed.append((instance.name, idx, size))
2139
        if self._EnsureChildSizes(disk):
2140
          self.cfg.Update(instance, feedback_fn)
2141
          changed.append((instance.name, idx, disk.size))
2142
    return changed
2143

    
2144

    
2145
class LURenameCluster(LogicalUnit):
2146
  """Rename the cluster.
2147

2148
  """
2149
  HPATH = "cluster-rename"
2150
  HTYPE = constants.HTYPE_CLUSTER
2151
  _OP_REQP = ["name"]
2152

    
2153
  def BuildHooksEnv(self):
2154
    """Build hooks env.
2155

2156
    """
2157
    env = {
2158
      "OP_TARGET": self.cfg.GetClusterName(),
2159
      "NEW_NAME": self.op.name,
2160
      }
2161
    mn = self.cfg.GetMasterNode()
2162
    all_nodes = self.cfg.GetNodeList()
2163
    return env, [mn], all_nodes
2164

    
2165
  def CheckPrereq(self):
2166
    """Verify that the passed name is a valid one.
2167

2168
    """
2169
    hostname = utils.GetHostInfo(self.op.name)
2170

    
2171
    new_name = hostname.name
2172
    self.ip = new_ip = hostname.ip
2173
    old_name = self.cfg.GetClusterName()
2174
    old_ip = self.cfg.GetMasterIP()
2175
    if new_name == old_name and new_ip == old_ip:
2176
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2177
                                 " cluster has changed",
2178
                                 errors.ECODE_INVAL)
2179
    if new_ip != old_ip:
2180
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2181
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2182
                                   " reachable on the network. Aborting." %
2183
                                   new_ip, errors.ECODE_NOTUNIQUE)
2184

    
2185
    self.op.name = new_name
2186

    
2187
  def Exec(self, feedback_fn):
2188
    """Rename the cluster.
2189

2190
    """
2191
    clustername = self.op.name
2192
    ip = self.ip
2193

    
2194
    # shutdown the master IP
2195
    master = self.cfg.GetMasterNode()
2196
    result = self.rpc.call_node_stop_master(master, False)
2197
    result.Raise("Could not disable the master role")
2198

    
2199
    try:
2200
      cluster = self.cfg.GetClusterInfo()
2201
      cluster.cluster_name = clustername
2202
      cluster.master_ip = ip
2203
      self.cfg.Update(cluster, feedback_fn)
2204

    
2205
      # update the known hosts file
2206
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2207
      node_list = self.cfg.GetNodeList()
2208
      try:
2209
        node_list.remove(master)
2210
      except ValueError:
2211
        pass
2212
      result = self.rpc.call_upload_file(node_list,
2213
                                         constants.SSH_KNOWN_HOSTS_FILE)
2214
      for to_node, to_result in result.iteritems():
2215
        msg = to_result.fail_msg
2216
        if msg:
2217
          msg = ("Copy of file %s to node %s failed: %s" %
2218
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2219
          self.proc.LogWarning(msg)
2220

    
2221
    finally:
2222
      result = self.rpc.call_node_start_master(master, False, False)
2223
      msg = result.fail_msg
2224
      if msg:
2225
        self.LogWarning("Could not re-enable the master role on"
2226
                        " the master, please restart manually: %s", msg)
2227

    
2228

    
2229
def _RecursiveCheckIfLVMBased(disk):
2230
  """Check if the given disk or its children are lvm-based.
2231

2232
  @type disk: L{objects.Disk}
2233
  @param disk: the disk to check
2234
  @rtype: boolean
2235
  @return: boolean indicating whether a LD_LV dev_type was found or not
2236

2237
  """
2238
  if disk.children:
2239
    for chdisk in disk.children:
2240
      if _RecursiveCheckIfLVMBased(chdisk):
2241
        return True
2242
  return disk.dev_type == constants.LD_LV
2243

    
2244

    
2245
class LUSetClusterParams(LogicalUnit):
2246
  """Change the parameters of the cluster.
2247

2248
  """
2249
  HPATH = "cluster-modify"
2250
  HTYPE = constants.HTYPE_CLUSTER
2251
  _OP_REQP = []
2252
  REQ_BGL = False
2253

    
2254
  def CheckArguments(self):
2255
    """Check parameters
2256

2257
    """
2258
    if not hasattr(self.op, "candidate_pool_size"):
2259
      self.op.candidate_pool_size = None
2260
    if self.op.candidate_pool_size is not None:
2261
      try:
2262
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2263
      except (ValueError, TypeError), err:
2264
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2265
                                   str(err), errors.ECODE_INVAL)
2266
      if self.op.candidate_pool_size < 1:
2267
        raise errors.OpPrereqError("At least one master candidate needed",
2268
                                   errors.ECODE_INVAL)
2269

    
2270
    _CheckBooleanOpField(self.op, "maintain_node_health")
2271

    
2272
    if self.op.uid_pool:
2273
      uidpool.CheckUidPool(self.op.uid_pool)
2274

    
2275
    if self.op.add_uids:
2276
      uidpool.CheckUidPool(self.op.add_uids)
2277

    
2278
    if self.op.remove_uids:
2279
      uidpool.CheckUidPool(self.op.remove_uids)
2280

    
2281
  def ExpandNames(self):
2282
    # FIXME: in the future maybe other cluster params won't require checking on
2283
    # all nodes to be modified.
2284
    self.needed_locks = {
2285
      locking.LEVEL_NODE: locking.ALL_SET,
2286
    }
2287
    self.share_locks[locking.LEVEL_NODE] = 1
2288

    
2289
  def BuildHooksEnv(self):
2290
    """Build hooks env.
2291

2292
    """
2293
    env = {
2294
      "OP_TARGET": self.cfg.GetClusterName(),
2295
      "NEW_VG_NAME": self.op.vg_name,
2296
      }
2297
    mn = self.cfg.GetMasterNode()
2298
    return env, [mn], [mn]
2299

    
2300
  def CheckPrereq(self):
2301
    """Check prerequisites.
2302

2303
    This checks whether the given params don't conflict and
2304
    if the given volume group is valid.
2305

2306
    """
2307
    if self.op.vg_name is not None and not self.op.vg_name:
2308
      instances = self.cfg.GetAllInstancesInfo().values()
2309
      for inst in instances:
2310
        for disk in inst.disks:
2311
          if _RecursiveCheckIfLVMBased(disk):
2312
            raise errors.OpPrereqError("Cannot disable lvm storage while"
2313
                                       " lvm-based instances exist",
2314
                                       errors.ECODE_INVAL)
2315

    
2316
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2317

    
2318
    # if vg_name not None, checks given volume group on all nodes
2319
    if self.op.vg_name:
2320
      vglist = self.rpc.call_vg_list(node_list)
2321
      for node in node_list:
2322
        msg = vglist[node].fail_msg
2323
        if msg:
2324
          # ignoring down node
2325
          self.LogWarning("Error while gathering data on node %s"
2326
                          " (ignoring node): %s", node, msg)
2327
          continue
2328
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2329
                                              self.op.vg_name,
2330
                                              constants.MIN_VG_SIZE)
2331
        if vgstatus:
2332
          raise errors.OpPrereqError("Error on node '%s': %s" %
2333
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2334

    
2335
    self.cluster = cluster = self.cfg.GetClusterInfo()
2336
    # validate params changes
2337
    if self.op.beparams:
2338
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2339
      self.new_beparams = objects.FillDict(
2340
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2341

    
2342
    if self.op.nicparams:
2343
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2344
      self.new_nicparams = objects.FillDict(
2345
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2346
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2347
      nic_errors = []
2348

    
2349
      # check all instances for consistency
2350
      for instance in self.cfg.GetAllInstancesInfo().values():
2351
        for nic_idx, nic in enumerate(instance.nics):
2352
          params_copy = copy.deepcopy(nic.nicparams)
2353
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2354

    
2355
          # check parameter syntax
2356
          try:
2357
            objects.NIC.CheckParameterSyntax(params_filled)
2358
          except errors.ConfigurationError, err:
2359
            nic_errors.append("Instance %s, nic/%d: %s" %
2360
                              (instance.name, nic_idx, err))
2361

    
2362
          # if we're moving instances to routed, check that they have an ip
2363
          target_mode = params_filled[constants.NIC_MODE]
2364
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2365
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2366
                              (instance.name, nic_idx))
2367
      if nic_errors:
2368
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2369
                                   "\n".join(nic_errors))
2370

    
2371
    # hypervisor list/parameters
2372
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
2373
    if self.op.hvparams:
2374
      if not isinstance(self.op.hvparams, dict):
2375
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2376
                                   errors.ECODE_INVAL)
2377
      for hv_name, hv_dict in self.op.hvparams.items():
2378
        if hv_name not in self.new_hvparams:
2379
          self.new_hvparams[hv_name] = hv_dict
2380
        else:
2381
          self.new_hvparams[hv_name].update(hv_dict)
2382

    
2383
    # os hypervisor parameters
2384
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2385
    if self.op.os_hvp:
2386
      if not isinstance(self.op.os_hvp, dict):
2387
        raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2388
                                   errors.ECODE_INVAL)
2389
      for os_name, hvs in self.op.os_hvp.items():
2390
        if not isinstance(hvs, dict):
2391
          raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2392
                                      " input"), errors.ECODE_INVAL)
2393
        if os_name not in self.new_os_hvp:
2394
          self.new_os_hvp[os_name] = hvs
2395
        else:
2396
          for hv_name, hv_dict in hvs.items():
2397
            if hv_name not in self.new_os_hvp[os_name]:
2398
              self.new_os_hvp[os_name][hv_name] = hv_dict
2399
            else:
2400
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2401

    
2402
    if self.op.enabled_hypervisors is not None:
2403
      self.hv_list = self.op.enabled_hypervisors
2404
      if not self.hv_list:
2405
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2406
                                   " least one member",
2407
                                   errors.ECODE_INVAL)
2408
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2409
      if invalid_hvs:
2410
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2411
                                   " entries: %s" %
2412
                                   utils.CommaJoin(invalid_hvs),
2413
                                   errors.ECODE_INVAL)
2414
    else:
2415
      self.hv_list = cluster.enabled_hypervisors
2416

    
2417
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2418
      # either the enabled list has changed, or the parameters have, validate
2419
      for hv_name, hv_params in self.new_hvparams.items():
2420
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2421
            (self.op.enabled_hypervisors and
2422
             hv_name in self.op.enabled_hypervisors)):
2423
          # either this is a new hypervisor, or its parameters have changed
2424
          hv_class = hypervisor.GetHypervisor(hv_name)
2425
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2426
          hv_class.CheckParameterSyntax(hv_params)
2427
          _CheckHVParams(self, node_list, hv_name, hv_params)
2428

    
2429
    if self.op.os_hvp:
2430
      # no need to check any newly-enabled hypervisors, since the
2431
      # defaults have already been checked in the above code-block
2432
      for os_name, os_hvp in self.new_os_hvp.items():
2433
        for hv_name, hv_params in os_hvp.items():
2434
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2435
          # we need to fill in the new os_hvp on top of the actual hv_p
2436
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2437
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2438
          hv_class = hypervisor.GetHypervisor(hv_name)
2439
          hv_class.CheckParameterSyntax(new_osp)
2440
          _CheckHVParams(self, node_list, hv_name, new_osp)
2441

    
2442

    
2443
  def Exec(self, feedback_fn):
2444
    """Change the parameters of the cluster.
2445

2446
    """
2447
    if self.op.vg_name is not None:
2448
      new_volume = self.op.vg_name
2449
      if not new_volume:
2450
        new_volume = None
2451
      if new_volume != self.cfg.GetVGName():
2452
        self.cfg.SetVGName(new_volume)
2453
      else:
2454
        feedback_fn("Cluster LVM configuration already in desired"
2455
                    " state, not changing")
2456
    if self.op.hvparams:
2457
      self.cluster.hvparams = self.new_hvparams
2458
    if self.op.os_hvp:
2459
      self.cluster.os_hvp = self.new_os_hvp
2460
    if self.op.enabled_hypervisors is not None:
2461
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2462
    if self.op.beparams:
2463
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2464
    if self.op.nicparams:
2465
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2466

    
2467
    if self.op.candidate_pool_size is not None:
2468
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2469
      # we need to update the pool size here, otherwise the save will fail
2470
      _AdjustCandidatePool(self, [])
2471

    
2472
    if self.op.maintain_node_health is not None:
2473
      self.cluster.maintain_node_health = self.op.maintain_node_health
2474

    
2475
    if self.op.add_uids is not None:
2476
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2477

    
2478
    if self.op.remove_uids is not None:
2479
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2480

    
2481
    if self.op.uid_pool is not None:
2482
      self.cluster.uid_pool = self.op.uid_pool
2483

    
2484
    self.cfg.Update(self.cluster, feedback_fn)
2485

    
2486

    
2487
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2488
  """Distribute additional files which are part of the cluster configuration.
2489

2490
  ConfigWriter takes care of distributing the config and ssconf files, but
2491
  there are more files which should be distributed to all nodes. This function
2492
  makes sure those are copied.
2493

2494
  @param lu: calling logical unit
2495
  @param additional_nodes: list of nodes not in the config to distribute to
2496

2497
  """
2498
  # 1. Gather target nodes
2499
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2500
  dist_nodes = lu.cfg.GetOnlineNodeList()
2501
  if additional_nodes is not None:
2502
    dist_nodes.extend(additional_nodes)
2503
  if myself.name in dist_nodes:
2504
    dist_nodes.remove(myself.name)
2505

    
2506
  # 2. Gather files to distribute
2507
  dist_files = set([constants.ETC_HOSTS,
2508
                    constants.SSH_KNOWN_HOSTS_FILE,
2509
                    constants.RAPI_CERT_FILE,
2510
                    constants.RAPI_USERS_FILE,
2511
                    constants.CONFD_HMAC_KEY,
2512
                   ])
2513

    
2514
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2515
  for hv_name in enabled_hypervisors:
2516
    hv_class = hypervisor.GetHypervisor(hv_name)
2517
    dist_files.update(hv_class.GetAncillaryFiles())
2518

    
2519
  # 3. Perform the files upload
2520
  for fname in dist_files:
2521
    if os.path.exists(fname):
2522
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2523
      for to_node, to_result in result.items():
2524
        msg = to_result.fail_msg
2525
        if msg:
2526
          msg = ("Copy of file %s to node %s failed: %s" %
2527
                 (fname, to_node, msg))
2528
          lu.proc.LogWarning(msg)
2529

    
2530

    
2531
class LURedistributeConfig(NoHooksLU):
2532
  """Force the redistribution of cluster configuration.
2533

2534
  This is a very simple LU.
2535

2536
  """
2537
  _OP_REQP = []
2538
  REQ_BGL = False
2539

    
2540
  def ExpandNames(self):
2541
    self.needed_locks = {
2542
      locking.LEVEL_NODE: locking.ALL_SET,
2543
    }
2544
    self.share_locks[locking.LEVEL_NODE] = 1
2545

    
2546
  def CheckPrereq(self):
2547
    """Check prerequisites.
2548

2549
    """
2550

    
2551
  def Exec(self, feedback_fn):
2552
    """Redistribute the configuration.
2553

2554
    """
2555
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2556
    _RedistributeAncillaryFiles(self)
2557

    
2558

    
2559
def _WaitForSync(lu, instance, oneshot=False):
2560
  """Sleep and poll for an instance's disk to sync.
2561

2562
  """
2563
  if not instance.disks:
2564
    return True
2565

    
2566
  if not oneshot:
2567
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2568

    
2569
  node = instance.primary_node
2570

    
2571
  for dev in instance.disks:
2572
    lu.cfg.SetDiskID(dev, node)
2573

    
2574
  # TODO: Convert to utils.Retry
2575

    
2576
  retries = 0
2577
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2578
  while True:
2579
    max_time = 0
2580
    done = True
2581
    cumul_degraded = False
2582
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2583
    msg = rstats.fail_msg
2584
    if msg:
2585
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2586
      retries += 1
2587
      if retries >= 10:
2588
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2589
                                 " aborting." % node)
2590
      time.sleep(6)
2591
      continue
2592
    rstats = rstats.payload
2593
    retries = 0
2594
    for i, mstat in enumerate(rstats):
2595
      if mstat is None:
2596
        lu.LogWarning("Can't compute data for node %s/%s",
2597
                           node, instance.disks[i].iv_name)
2598
        continue
2599

    
2600
      cumul_degraded = (cumul_degraded or
2601
                        (mstat.is_degraded and mstat.sync_percent is None))
2602
      if mstat.sync_percent is not None:
2603
        done = False
2604
        if mstat.estimated_time is not None:
2605
          rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2606
          max_time = mstat.estimated_time
2607
        else:
2608
          rem_time = "no time estimate"
2609
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2610
                        (instance.disks[i].iv_name, mstat.sync_percent,
2611
                         rem_time))
2612

    
2613
    # if we're done but degraded, let's do a few small retries, to
2614
    # make sure we see a stable and not transient situation; therefore
2615
    # we force restart of the loop
2616
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2617
      logging.info("Degraded disks found, %d retries left", degr_retries)
2618
      degr_retries -= 1
2619
      time.sleep(1)
2620
      continue
2621

    
2622
    if done or oneshot:
2623
      break
2624

    
2625
    time.sleep(min(60, max_time))
2626

    
2627
  if done:
2628
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2629
  return not cumul_degraded
2630

    
2631

    
2632
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2633
  """Check that mirrors are not degraded.
2634

2635
  The ldisk parameter, if True, will change the test from the
2636
  is_degraded attribute (which represents overall non-ok status for
2637
  the device(s)) to the ldisk (representing the local storage status).
2638

2639
  """
2640
  lu.cfg.SetDiskID(dev, node)
2641

    
2642
  result = True
2643

    
2644
  if on_primary or dev.AssembleOnSecondary():
2645
    rstats = lu.rpc.call_blockdev_find(node, dev)
2646
    msg = rstats.fail_msg
2647
    if msg:
2648
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2649
      result = False
2650
    elif not rstats.payload:
2651
      lu.LogWarning("Can't find disk on node %s", node)
2652
      result = False
2653
    else:
2654
      if ldisk:
2655
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2656
      else:
2657
        result = result and not rstats.payload.is_degraded
2658

    
2659
  if dev.children:
2660
    for child in dev.children:
2661
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2662

    
2663
  return result
2664

    
2665

    
2666
class LUDiagnoseOS(NoHooksLU):
2667
  """Logical unit for OS diagnose/query.
2668

2669
  """
2670
  _OP_REQP = ["output_fields", "names"]
2671
  REQ_BGL = False
2672
  _FIELDS_STATIC = utils.FieldSet()
2673
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2674
  # Fields that need calculation of global os validity
2675
  _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2676

    
2677
  def ExpandNames(self):
2678
    if self.op.names:
2679
      raise errors.OpPrereqError("Selective OS query not supported",
2680
                                 errors.ECODE_INVAL)
2681

    
2682
    _CheckOutputFields(static=self._FIELDS_STATIC,
2683
                       dynamic=self._FIELDS_DYNAMIC,
2684
                       selected=self.op.output_fields)
2685

    
2686
    # Lock all nodes, in shared mode
2687
    # Temporary removal of locks, should be reverted later
2688
    # TODO: reintroduce locks when they are lighter-weight
2689
    self.needed_locks = {}
2690
    #self.share_locks[locking.LEVEL_NODE] = 1
2691
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2692

    
2693
  def CheckPrereq(self):
2694
    """Check prerequisites.
2695

2696
    """
2697

    
2698
  @staticmethod
2699
  def _DiagnoseByOS(rlist):
2700
    """Remaps a per-node return list into an a per-os per-node dictionary
2701

2702
    @param rlist: a map with node names as keys and OS objects as values
2703

2704
    @rtype: dict
2705
    @return: a dictionary with osnames as keys and as value another map, with
2706
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
2707

2708
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2709
                                     (/srv/..., False, "invalid api")],
2710
                           "node2": [(/srv/..., True, "")]}
2711
          }
2712

2713
    """
2714
    all_os = {}
2715
    # we build here the list of nodes that didn't fail the RPC (at RPC
2716
    # level), so that nodes with a non-responding node daemon don't
2717
    # make all OSes invalid
2718
    good_nodes = [node_name for node_name in rlist
2719
                  if not rlist[node_name].fail_msg]
2720
    for node_name, nr in rlist.items():
2721
      if nr.fail_msg or not nr.payload:
2722
        continue
2723
      for name, path, status, diagnose, variants in nr.payload:
2724
        if name not in all_os:
2725
          # build a list of nodes for this os containing empty lists
2726
          # for each node in node_list
2727
          all_os[name] = {}
2728
          for nname in good_nodes:
2729
            all_os[name][nname] = []
2730
        all_os[name][node_name].append((path, status, diagnose, variants))
2731
    return all_os
2732

    
2733
  def Exec(self, feedback_fn):
2734
    """Compute the list of OSes.
2735

2736
    """
2737
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2738
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2739
    pol = self._DiagnoseByOS(node_data)
2740
    output = []
2741
    calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2742
    calc_variants = "variants" in self.op.output_fields
2743

    
2744
    for os_name, os_data in pol.items():
2745
      row = []
2746
      if calc_valid:
2747
        valid = True
2748
        variants = None
2749
        for osl in os_data.values():
2750
          valid = valid and osl and osl[0][1]
2751
          if not valid:
2752
            variants = None
2753
            break
2754
          if calc_variants:
2755
            node_variants = osl[0][3]
2756
            if variants is None:
2757
              variants = node_variants
2758
            else:
2759
              variants = [v for v in variants if v in node_variants]
2760

    
2761
      for field in self.op.output_fields:
2762
        if field == "name":
2763
          val = os_name
2764
        elif field == "valid":
2765
          val = valid
2766
        elif field == "node_status":
2767
          # this is just a copy of the dict
2768
          val = {}
2769
          for node_name, nos_list in os_data.items():
2770
            val[node_name] = nos_list
2771
        elif field == "variants":
2772
          val =  variants
2773
        else:
2774
          raise errors.ParameterError(field)
2775
        row.append(val)
2776
      output.append(row)
2777

    
2778
    return output
2779

    
2780

    
2781
class LURemoveNode(LogicalUnit):
2782
  """Logical unit for removing a node.
2783

2784
  """
2785
  HPATH = "node-remove"
2786
  HTYPE = constants.HTYPE_NODE
2787
  _OP_REQP = ["node_name"]
2788

    
2789
  def BuildHooksEnv(self):
2790
    """Build hooks env.
2791

2792
    This doesn't run on the target node in the pre phase as a failed
2793
    node would then be impossible to remove.
2794

2795
    """
2796
    env = {
2797
      "OP_TARGET": self.op.node_name,
2798
      "NODE_NAME": self.op.node_name,
2799
      }
2800
    all_nodes = self.cfg.GetNodeList()
2801
    try:
2802
      all_nodes.remove(self.op.node_name)
2803
    except ValueError:
2804
      logging.warning("Node %s which is about to be removed not found"
2805
                      " in the all nodes list", self.op.node_name)
2806
    return env, all_nodes, all_nodes
2807

    
2808
  def CheckPrereq(self):
2809
    """Check prerequisites.
2810

2811
    This checks:
2812
     - the node exists in the configuration
2813
     - it does not have primary or secondary instances
2814
     - it's not the master
2815

2816
    Any errors are signaled by raising errors.OpPrereqError.
2817

2818
    """
2819
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2820
    node = self.cfg.GetNodeInfo(self.op.node_name)
2821
    assert node is not None
2822

    
2823
    instance_list = self.cfg.GetInstanceList()
2824

    
2825
    masternode = self.cfg.GetMasterNode()
2826
    if node.name == masternode:
2827
      raise errors.OpPrereqError("Node is the master node,"
2828
                                 " you need to failover first.",
2829
                                 errors.ECODE_INVAL)
2830

    
2831
    for instance_name in instance_list:
2832
      instance = self.cfg.GetInstanceInfo(instance_name)
2833
      if node.name in instance.all_nodes:
2834
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2835
                                   " please remove first." % instance_name,
2836
                                   errors.ECODE_INVAL)
2837
    self.op.node_name = node.name
2838
    self.node = node
2839

    
2840
  def Exec(self, feedback_fn):
2841
    """Removes the node from the cluster.
2842

2843
    """
2844
    node = self.node
2845
    logging.info("Stopping the node daemon and removing configs from node %s",
2846
                 node.name)
2847

    
2848
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2849

    
2850
    # Promote nodes to master candidate as needed
2851
    _AdjustCandidatePool(self, exceptions=[node.name])
2852
    self.context.RemoveNode(node.name)
2853

    
2854
    # Run post hooks on the node before it's removed
2855
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2856
    try:
2857
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2858
    except:
2859
      # pylint: disable-msg=W0702
2860
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
2861

    
2862
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2863
    msg = result.fail_msg
2864
    if msg:
2865
      self.LogWarning("Errors encountered on the remote node while leaving"
2866
                      " the cluster: %s", msg)
2867

    
2868

    
2869
class LUQueryNodes(NoHooksLU):
2870
  """Logical unit for querying nodes.
2871

2872
  """
2873
  # pylint: disable-msg=W0142
2874
  _OP_REQP = ["output_fields", "names", "use_locking"]
2875
  REQ_BGL = False
2876

    
2877
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2878
                    "master_candidate", "offline", "drained"]
2879

    
2880
  _FIELDS_DYNAMIC = utils.FieldSet(
2881
    "dtotal", "dfree",
2882
    "mtotal", "mnode", "mfree",
2883
    "bootid",
2884
    "ctotal", "cnodes", "csockets",
2885
    )
2886

    
2887
  _FIELDS_STATIC = utils.FieldSet(*[
2888
    "pinst_cnt", "sinst_cnt",
2889
    "pinst_list", "sinst_list",
2890
    "pip", "sip", "tags",
2891
    "master",
2892
    "role"] + _SIMPLE_FIELDS
2893
    )
2894

    
2895
  def ExpandNames(self):
2896
    _CheckOutputFields(static=self._FIELDS_STATIC,
2897
                       dynamic=self._FIELDS_DYNAMIC,
2898
                       selected=self.op.output_fields)
2899

    
2900
    self.needed_locks = {}
2901
    self.share_locks[locking.LEVEL_NODE] = 1
2902

    
2903
    if self.op.names:
2904
      self.wanted = _GetWantedNodes(self, self.op.names)
2905
    else:
2906
      self.wanted = locking.ALL_SET
2907

    
2908
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2909
    self.do_locking = self.do_node_query and self.op.use_locking
2910
    if self.do_locking:
2911
      # if we don't request only static fields, we need to lock the nodes
2912
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2913

    
2914
  def CheckPrereq(self):
2915
    """Check prerequisites.
2916

2917
    """
2918
    # The validation of the node list is done in the _GetWantedNodes,
2919
    # if non empty, and if empty, there's no validation to do
2920
    pass
2921

    
2922
  def Exec(self, feedback_fn):
2923
    """Computes the list of nodes and their attributes.
2924

2925
    """
2926
    all_info = self.cfg.GetAllNodesInfo()
2927
    if self.do_locking:
2928
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2929
    elif self.wanted != locking.ALL_SET:
2930
      nodenames = self.wanted
2931
      missing = set(nodenames).difference(all_info.keys())
2932
      if missing:
2933
        raise errors.OpExecError(
2934
          "Some nodes were removed before retrieving their data: %s" % missing)
2935
    else:
2936
      nodenames = all_info.keys()
2937

    
2938
    nodenames = utils.NiceSort(nodenames)
2939
    nodelist = [all_info[name] for name in nodenames]
2940

    
2941
    # begin data gathering
2942

    
2943
    if self.do_node_query:
2944
      live_data = {}
2945
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2946
                                          self.cfg.GetHypervisorType())
2947
      for name in nodenames:
2948
        nodeinfo = node_data[name]
2949
        if not nodeinfo.fail_msg and nodeinfo.payload:
2950
          nodeinfo = nodeinfo.payload
2951
          fn = utils.TryConvert
2952
          live_data[name] = {
2953
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2954
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2955
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2956
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2957
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2958
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2959
            "bootid": nodeinfo.get('bootid', None),
2960
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2961
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2962
            }
2963
        else:
2964
          live_data[name] = {}
2965
    else:
2966
      live_data = dict.fromkeys(nodenames, {})
2967

    
2968
    node_to_primary = dict([(name, set()) for name in nodenames])
2969
    node_to_secondary = dict([(name, set()) for name in nodenames])
2970

    
2971
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2972
                             "sinst_cnt", "sinst_list"))
2973
    if inst_fields & frozenset(self.op.output_fields):
2974
      inst_data = self.cfg.GetAllInstancesInfo()
2975

    
2976
      for inst in inst_data.values():
2977
        if inst.primary_node in node_to_primary:
2978
          node_to_primary[inst.primary_node].add(inst.name)
2979
        for secnode in inst.secondary_nodes:
2980
          if secnode in node_to_secondary:
2981
            node_to_secondary[secnode].add(inst.name)
2982

    
2983
    master_node = self.cfg.GetMasterNode()
2984

    
2985
    # end data gathering
2986

    
2987
    output = []
2988
    for node in nodelist:
2989
      node_output = []
2990
      for field in self.op.output_fields:
2991
        if field in self._SIMPLE_FIELDS:
2992
          val = getattr(node, field)
2993
        elif field == "pinst_list":
2994
          val = list(node_to_primary[node.name])
2995
        elif field == "sinst_list":
2996
          val = list(node_to_secondary[node.name])
2997
        elif field == "pinst_cnt":
2998
          val = len(node_to_primary[node.name])
2999
        elif field == "sinst_cnt":
3000
          val = len(node_to_secondary[node.name])
3001
        elif field == "pip":
3002
          val = node.primary_ip
3003
        elif field == "sip":
3004
          val = node.secondary_ip
3005
        elif field == "tags":
3006
          val = list(node.GetTags())
3007
        elif field == "master":
3008
          val = node.name == master_node
3009
        elif self._FIELDS_DYNAMIC.Matches(field):
3010
          val = live_data[node.name].get(field, None)
3011
        elif field == "role":
3012
          if node.name == master_node:
3013
            val = "M"
3014
          elif node.master_candidate:
3015
            val = "C"
3016
          elif node.drained:
3017
            val = "D"
3018
          elif node.offline:
3019
            val = "O"
3020
          else:
3021
            val = "R"
3022
        else:
3023
          raise errors.ParameterError(field)
3024
        node_output.append(val)
3025
      output.append(node_output)
3026

    
3027
    return output
3028

    
3029

    
3030
class LUQueryNodeVolumes(NoHooksLU):
3031
  """Logical unit for getting volumes on node(s).
3032

3033
  """
3034
  _OP_REQP = ["nodes", "output_fields"]
3035
  REQ_BGL = False
3036
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3037
  _FIELDS_STATIC = utils.FieldSet("node")
3038

    
3039
  def ExpandNames(self):
3040
    _CheckOutputFields(static=self._FIELDS_STATIC,
3041
                       dynamic=self._FIELDS_DYNAMIC,
3042
                       selected=self.op.output_fields)
3043

    
3044
    self.needed_locks = {}
3045
    self.share_locks[locking.LEVEL_NODE] = 1
3046
    if not self.op.nodes:
3047
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3048
    else:
3049
      self.needed_locks[locking.LEVEL_NODE] = \
3050
        _GetWantedNodes(self, self.op.nodes)
3051

    
3052
  def CheckPrereq(self):
3053
    """Check prerequisites.
3054

3055
    This checks that the fields required are valid output fields.
3056

3057
    """
3058
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3059

    
3060
  def Exec(self, feedback_fn):
3061
    """Computes the list of nodes and their attributes.
3062

3063
    """
3064
    nodenames = self.nodes
3065
    volumes = self.rpc.call_node_volumes(nodenames)
3066

    
3067
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3068
             in self.cfg.GetInstanceList()]
3069

    
3070
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3071

    
3072
    output = []
3073
    for node in nodenames:
3074
      nresult = volumes[node]
3075
      if nresult.offline:
3076
        continue
3077
      msg = nresult.fail_msg
3078
      if msg:
3079
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3080
        continue
3081

    
3082
      node_vols = nresult.payload[:]
3083
      node_vols.sort(key=lambda vol: vol['dev'])
3084

    
3085
      for vol in node_vols:
3086
        node_output = []
3087
        for field in self.op.output_fields:
3088
          if field == "node":
3089
            val = node
3090
          elif field == "phys":
3091
            val = vol['dev']
3092
          elif field == "vg":
3093
            val = vol['vg']
3094
          elif field == "name":
3095
            val = vol['name']
3096
          elif field == "size":
3097
            val = int(float(vol['size']))
3098
          elif field == "instance":
3099
            for inst in ilist:
3100
              if node not in lv_by_node[inst]:
3101
                continue
3102
              if vol['name'] in lv_by_node[inst][node]:
3103
                val = inst.name
3104
                break
3105
            else:
3106
              val = '-'
3107
          else:
3108
            raise errors.ParameterError(field)
3109
          node_output.append(str(val))
3110

    
3111
        output.append(node_output)
3112

    
3113
    return output
3114

    
3115

    
3116
class LUQueryNodeStorage(NoHooksLU):
3117
  """Logical unit for getting information on storage units on node(s).
3118

3119
  """
3120
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
3121
  REQ_BGL = False
3122
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3123

    
3124
  def CheckArguments(self):
3125
    _CheckStorageType(self.op.storage_type)
3126

    
3127
    _CheckOutputFields(static=self._FIELDS_STATIC,
3128
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3129
                       selected=self.op.output_fields)
3130

    
3131
  def ExpandNames(self):
3132
    self.needed_locks = {}
3133
    self.share_locks[locking.LEVEL_NODE] = 1
3134

    
3135
    if self.op.nodes:
3136
      self.needed_locks[locking.LEVEL_NODE] = \
3137
        _GetWantedNodes(self, self.op.nodes)
3138
    else:
3139
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3140

    
3141
  def CheckPrereq(self):
3142
    """Check prerequisites.
3143

3144
    This checks that the fields required are valid output fields.
3145

3146
    """
3147
    self.op.name = getattr(self.op, "name", None)
3148

    
3149
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3150

    
3151
  def Exec(self, feedback_fn):
3152
    """Computes the list of nodes and their attributes.
3153

3154
    """
3155
    # Always get name to sort by
3156
    if constants.SF_NAME in self.op.output_fields:
3157
      fields = self.op.output_fields[:]
3158
    else:
3159
      fields = [constants.SF_NAME] + self.op.output_fields
3160

    
3161
    # Never ask for node or type as it's only known to the LU
3162
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3163
      while extra in fields:
3164
        fields.remove(extra)
3165

    
3166
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3167
    name_idx = field_idx[constants.SF_NAME]
3168

    
3169
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3170
    data = self.rpc.call_storage_list(self.nodes,
3171
                                      self.op.storage_type, st_args,
3172
                                      self.op.name, fields)
3173

    
3174
    result = []
3175

    
3176
    for node in utils.NiceSort(self.nodes):
3177
      nresult = data[node]
3178
      if nresult.offline:
3179
        continue
3180

    
3181
      msg = nresult.fail_msg
3182
      if msg:
3183
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3184
        continue
3185

    
3186
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3187

    
3188
      for name in utils.NiceSort(rows.keys()):
3189
        row = rows[name]
3190

    
3191
        out = []
3192

    
3193
        for field in self.op.output_fields:
3194
          if field == constants.SF_NODE:
3195
            val = node
3196
          elif field == constants.SF_TYPE:
3197
            val = self.op.storage_type
3198
          elif field in field_idx:
3199
            val = row[field_idx[field]]
3200
          else:
3201
            raise errors.ParameterError(field)
3202

    
3203
          out.append(val)
3204

    
3205
        result.append(out)
3206

    
3207
    return result
3208

    
3209

    
3210
class LUModifyNodeStorage(NoHooksLU):
3211
  """Logical unit for modifying a storage volume on a node.
3212

3213
  """
3214
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3215
  REQ_BGL = False
3216

    
3217
  def CheckArguments(self):
3218
    self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
3219

    
3220
    _CheckStorageType(self.op.storage_type)
3221

    
3222
  def ExpandNames(self):
3223
    self.needed_locks = {
3224
      locking.LEVEL_NODE: self.op.node_name,
3225
      }
3226

    
3227
  def CheckPrereq(self):
3228
    """Check prerequisites.
3229

3230
    """
3231
    storage_type = self.op.storage_type
3232

    
3233
    try:
3234
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3235
    except KeyError:
3236
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3237
                                 " modified" % storage_type,
3238
                                 errors.ECODE_INVAL)
3239

    
3240
    diff = set(self.op.changes.keys()) - modifiable
3241
    if diff:
3242
      raise errors.OpPrereqError("The following fields can not be modified for"
3243
                                 " storage units of type '%s': %r" %
3244
                                 (storage_type, list(diff)),
3245
                                 errors.ECODE_INVAL)
3246

    
3247
  def Exec(self, feedback_fn):
3248
    """Computes the list of nodes and their attributes.
3249

3250
    """
3251
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3252
    result = self.rpc.call_storage_modify(self.op.node_name,
3253
                                          self.op.storage_type, st_args,
3254
                                          self.op.name, self.op.changes)
3255
    result.Raise("Failed to modify storage unit '%s' on %s" %
3256
                 (self.op.name, self.op.node_name))
3257

    
3258

    
3259
class LUAddNode(LogicalUnit):
3260
  """Logical unit for adding node to the cluster.
3261

3262
  """
3263
  HPATH = "node-add"
3264
  HTYPE = constants.HTYPE_NODE
3265
  _OP_REQP = ["node_name"]
3266

    
3267
  def CheckArguments(self):
3268
    # validate/normalize the node name
3269
    self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3270

    
3271
  def BuildHooksEnv(self):
3272
    """Build hooks env.
3273

3274
    This will run on all nodes before, and on all nodes + the new node after.
3275

3276
    """
3277
    env = {
3278
      "OP_TARGET": self.op.node_name,
3279
      "NODE_NAME": self.op.node_name,
3280
      "NODE_PIP": self.op.primary_ip,
3281
      "NODE_SIP": self.op.secondary_ip,
3282
      }
3283
    nodes_0 = self.cfg.GetNodeList()
3284
    nodes_1 = nodes_0 + [self.op.node_name, ]
3285
    return env, nodes_0, nodes_1
3286

    
3287
  def CheckPrereq(self):
3288
    """Check prerequisites.
3289

3290
    This checks:
3291
     - the new node is not already in the config
3292
     - it is resolvable
3293
     - its parameters (single/dual homed) matches the cluster
3294

3295
    Any errors are signaled by raising errors.OpPrereqError.
3296

3297
    """
3298
    node_name = self.op.node_name
3299
    cfg = self.cfg
3300

    
3301
    dns_data = utils.GetHostInfo(node_name)
3302

    
3303
    node = dns_data.name
3304
    primary_ip = self.op.primary_ip = dns_data.ip
3305
    secondary_ip = getattr(self.op, "secondary_ip", None)
3306
    if secondary_ip is None:
3307
      secondary_ip = primary_ip
3308
    if not utils.IsValidIP(secondary_ip):
3309
      raise errors.OpPrereqError("Invalid secondary IP given",
3310
                                 errors.ECODE_INVAL)
3311
    self.op.secondary_ip = secondary_ip
3312

    
3313
    node_list = cfg.GetNodeList()
3314
    if not self.op.readd and node in node_list:
3315
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3316
                                 node, errors.ECODE_EXISTS)
3317
    elif self.op.readd and node not in node_list:
3318
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3319
                                 errors.ECODE_NOENT)
3320

    
3321
    for existing_node_name in node_list:
3322
      existing_node = cfg.GetNodeInfo(existing_node_name)
3323

    
3324
      if self.op.readd and node == existing_node_name:
3325
        if (existing_node.primary_ip != primary_ip or
3326
            existing_node.secondary_ip != secondary_ip):
3327
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3328
                                     " address configuration as before",
3329
                                     errors.ECODE_INVAL)
3330
        continue
3331

    
3332
      if (existing_node.primary_ip == primary_ip or
3333
          existing_node.secondary_ip == primary_ip or
3334
          existing_node.primary_ip == secondary_ip or
3335
          existing_node.secondary_ip == secondary_ip):
3336
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3337
                                   " existing node %s" % existing_node.name,
3338
                                   errors.ECODE_NOTUNIQUE)
3339

    
3340
    # check that the type of the node (single versus dual homed) is the
3341
    # same as for the master
3342
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3343
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3344
    newbie_singlehomed = secondary_ip == primary_ip
3345
    if master_singlehomed != newbie_singlehomed:
3346
      if master_singlehomed:
3347
        raise errors.OpPrereqError("The master has no private ip but the"
3348
                                   " new node has one",
3349
                                   errors.ECODE_INVAL)
3350
      else:
3351
        raise errors.OpPrereqError("The master has a private ip but the"
3352
                                   " new node doesn't have one",
3353
                                   errors.ECODE_INVAL)
3354

    
3355
    # checks reachability
3356
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3357
      raise errors.OpPrereqError("Node not reachable by ping",
3358
                                 errors.ECODE_ENVIRON)
3359

    
3360
    if not newbie_singlehomed:
3361
      # check reachability from my secondary ip to newbie's secondary ip
3362
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3363
                           source=myself.secondary_ip):
3364
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3365
                                   " based ping to noded port",
3366
                                   errors.ECODE_ENVIRON)
3367

    
3368
    if self.op.readd:
3369
      exceptions = [node]
3370
    else:
3371
      exceptions = []
3372

    
3373
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3374

    
3375
    if self.op.readd:
3376
      self.new_node = self.cfg.GetNodeInfo(node)
3377
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3378
    else:
3379
      self.new_node = objects.Node(name=node,
3380
                                   primary_ip=primary_ip,
3381
                                   secondary_ip=secondary_ip,
3382
                                   master_candidate=self.master_candidate,
3383
                                   offline=False, drained=False)
3384

    
3385
  def Exec(self, feedback_fn):
3386
    """Adds the new node to the cluster.
3387

3388
    """
3389
    new_node = self.new_node
3390
    node = new_node.name
3391

    
3392
    # for re-adds, reset the offline/drained/master-candidate flags;
3393
    # we need to reset here, otherwise offline would prevent RPC calls
3394
    # later in the procedure; this also means that if the re-add
3395
    # fails, we are left with a non-offlined, broken node
3396
    if self.op.readd:
3397
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3398
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3399
      # if we demote the node, we do cleanup later in the procedure
3400
      new_node.master_candidate = self.master_candidate
3401

    
3402
    # notify the user about any possible mc promotion
3403
    if new_node.master_candidate:
3404
      self.LogInfo("Node will be a master candidate")
3405

    
3406
    # check connectivity
3407
    result = self.rpc.call_version([node])[node]
3408
    result.Raise("Can't get version information from node %s" % node)
3409
    if constants.PROTOCOL_VERSION == result.payload:
3410
      logging.info("Communication to node %s fine, sw version %s match",
3411
                   node, result.payload)
3412
    else:
3413
      raise errors.OpExecError("Version mismatch master version %s,"
3414
                               " node version %s" %
3415
                               (constants.PROTOCOL_VERSION, result.payload))
3416

    
3417
    # setup ssh on node
3418
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3419
      logging.info("Copy ssh key to node %s", node)
3420
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3421
      keyarray = []
3422
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3423
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3424
                  priv_key, pub_key]
3425

    
3426
      for i in keyfiles:
3427
        keyarray.append(utils.ReadFile(i))
3428

    
3429
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3430
                                      keyarray[2], keyarray[3], keyarray[4],
3431
                                      keyarray[5])
3432
      result.Raise("Cannot transfer ssh keys to the new node")
3433

    
3434
    # Add node to our /etc/hosts, and add key to known_hosts
3435
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3436
      utils.AddHostToEtcHosts(new_node.name)
3437

    
3438
    if new_node.secondary_ip != new_node.primary_ip:
3439
      result = self.rpc.call_node_has_ip_address(new_node.name,
3440
                                                 new_node.secondary_ip)
3441
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3442
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3443
      if not result.payload:
3444
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3445
                                 " you gave (%s). Please fix and re-run this"
3446
                                 " command." % new_node.secondary_ip)
3447

    
3448
    node_verify_list = [self.cfg.GetMasterNode()]
3449
    node_verify_param = {
3450
      constants.NV_NODELIST: [node],
3451
      # TODO: do a node-net-test as well?
3452
    }
3453

    
3454
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3455
                                       self.cfg.GetClusterName())
3456
    for verifier in node_verify_list:
3457
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3458
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3459
      if nl_payload:
3460
        for failed in nl_payload:
3461
          feedback_fn("ssh/hostname verification failed"
3462
                      " (checking from %s): %s" %
3463
                      (verifier, nl_payload[failed]))
3464
        raise errors.OpExecError("ssh/hostname verification failed.")
3465

    
3466
    if self.op.readd:
3467
      _RedistributeAncillaryFiles(self)
3468
      self.context.ReaddNode(new_node)
3469
      # make sure we redistribute the config
3470
      self.cfg.Update(new_node, feedback_fn)
3471
      # and make sure the new node will not have old files around
3472
      if not new_node.master_candidate:
3473
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3474
        msg = result.fail_msg
3475
        if msg:
3476
          self.LogWarning("Node failed to demote itself from master"
3477
                          " candidate status: %s" % msg)
3478
    else:
3479
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3480
      self.context.AddNode(new_node, self.proc.GetECId())
3481

    
3482

    
3483
class LUSetNodeParams(LogicalUnit):
3484
  """Modifies the parameters of a node.
3485

3486
  """
3487
  HPATH = "node-modify"
3488
  HTYPE = constants.HTYPE_NODE
3489
  _OP_REQP = ["node_name"]
3490
  REQ_BGL = False
3491

    
3492
  def CheckArguments(self):
3493
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3494
    _CheckBooleanOpField(self.op, 'master_candidate')
3495
    _CheckBooleanOpField(self.op, 'offline')
3496
    _CheckBooleanOpField(self.op, 'drained')
3497
    _CheckBooleanOpField(self.op, 'auto_promote')
3498
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3499
    if all_mods.count(None) == 3:
3500
      raise errors.OpPrereqError("Please pass at least one modification",
3501
                                 errors.ECODE_INVAL)
3502
    if all_mods.count(True) > 1:
3503
      raise errors.OpPrereqError("Can't set the node into more than one"
3504
                                 " state at the same time",
3505
                                 errors.ECODE_INVAL)
3506

    
3507
    # Boolean value that tells us whether we're offlining or draining the node
3508
    self.offline_or_drain = (self.op.offline == True or
3509
                             self.op.drained == True)
3510
    self.deoffline_or_drain = (self.op.offline == False or
3511
                               self.op.drained == False)
3512
    self.might_demote = (self.op.master_candidate == False or
3513
                         self.offline_or_drain)
3514

    
3515
    self.lock_all = self.op.auto_promote and self.might_demote
3516

    
3517

    
3518
  def ExpandNames(self):
3519
    if self.lock_all:
3520
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3521
    else:
3522
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3523

    
3524
  def BuildHooksEnv(self):
3525
    """Build hooks env.
3526

3527
    This runs on the master node.
3528

3529
    """
3530
    env = {
3531
      "OP_TARGET": self.op.node_name,
3532
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3533
      "OFFLINE": str(self.op.offline),
3534
      "DRAINED": str(self.op.drained),
3535
      }
3536
    nl = [self.cfg.GetMasterNode(),
3537
          self.op.node_name]
3538
    return env, nl, nl
3539

    
3540
  def CheckPrereq(self):
3541
    """Check prerequisites.
3542

3543
    This only checks the instance list against the existing names.
3544

3545
    """
3546
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3547

    
3548
    if (self.op.master_candidate is not None or
3549
        self.op.drained is not None or
3550
        self.op.offline is not None):
3551
      # we can't change the master's node flags
3552
      if self.op.node_name == self.cfg.GetMasterNode():
3553
        raise errors.OpPrereqError("The master role can be changed"
3554
                                   " only via masterfailover",
3555
                                   errors.ECODE_INVAL)
3556

    
3557

    
3558
    if node.master_candidate and self.might_demote and not self.lock_all:
3559
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
3560
      # check if after removing the current node, we're missing master
3561
      # candidates
3562
      (mc_remaining, mc_should, _) = \
3563
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3564
      if mc_remaining < mc_should:
3565
        raise errors.OpPrereqError("Not enough master candidates, please"
3566
                                   " pass auto_promote to allow promotion",
3567
                                   errors.ECODE_INVAL)
3568

    
3569
    if (self.op.master_candidate == True and
3570
        ((node.offline and not self.op.offline == False) or
3571
         (node.drained and not self.op.drained == False))):
3572
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3573
                                 " to master_candidate" % node.name,
3574
                                 errors.ECODE_INVAL)
3575

    
3576
    # If we're being deofflined/drained, we'll MC ourself if needed
3577
    if (self.deoffline_or_drain and not self.offline_or_drain and not
3578
        self.op.master_candidate == True and not node.master_candidate):
3579
      self.op.master_candidate = _DecideSelfPromotion(self)
3580
      if self.op.master_candidate:
3581
        self.LogInfo("Autopromoting node to master candidate")
3582

    
3583
    return
3584

    
3585
  def Exec(self, feedback_fn):
3586
    """Modifies a node.
3587

3588
    """
3589
    node = self.node
3590

    
3591
    result = []
3592
    changed_mc = False
3593

    
3594
    if self.op.offline is not None:
3595
      node.offline = self.op.offline
3596
      result.append(("offline", str(self.op.offline)))
3597
      if self.op.offline == True:
3598
        if node.master_candidate:
3599
          node.master_candidate = False
3600
          changed_mc = True
3601
          result.append(("master_candidate", "auto-demotion due to offline"))
3602
        if node.drained:
3603
          node.drained = False
3604
          result.append(("drained", "clear drained status due to offline"))
3605

    
3606
    if self.op.master_candidate is not None:
3607
      node.master_candidate = self.op.master_candidate
3608
      changed_mc = True
3609
      result.append(("master_candidate", str(self.op.master_candidate)))
3610
      if self.op.master_candidate == False:
3611
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3612
        msg = rrc.fail_msg
3613
        if msg:
3614
          self.LogWarning("Node failed to demote itself: %s" % msg)
3615

    
3616
    if self.op.drained is not None:
3617
      node.drained = self.op.drained
3618
      result.append(("drained", str(self.op.drained)))
3619
      if self.op.drained == True:
3620
        if node.master_candidate:
3621
          node.master_candidate = False
3622
          changed_mc = True
3623
          result.append(("master_candidate", "auto-demotion due to drain"))
3624
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3625
          msg = rrc.fail_msg
3626
          if msg:
3627
            self.LogWarning("Node failed to demote itself: %s" % msg)
3628
        if node.offline:
3629
          node.offline = False
3630
          result.append(("offline", "clear offline status due to drain"))
3631

    
3632
    # we locked all nodes, we adjust the CP before updating this node
3633
    if self.lock_all:
3634
      _AdjustCandidatePool(self, [node.name])
3635

    
3636
    # this will trigger configuration file update, if needed
3637
    self.cfg.Update(node, feedback_fn)
3638

    
3639
    # this will trigger job queue propagation or cleanup
3640
    if changed_mc:
3641
      self.context.ReaddNode(node)
3642

    
3643
    return result
3644

    
3645

    
3646
class LUPowercycleNode(NoHooksLU):
3647
  """Powercycles a node.
3648

3649
  """
3650
  _OP_REQP = ["node_name", "force"]
3651
  REQ_BGL = False
3652

    
3653
  def CheckArguments(self):
3654
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3655
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3656
      raise errors.OpPrereqError("The node is the master and the force"
3657
                                 " parameter was not set",
3658
                                 errors.ECODE_INVAL)
3659

    
3660
  def ExpandNames(self):
3661
    """Locking for PowercycleNode.
3662

3663
    This is a last-resort option and shouldn't block on other
3664
    jobs. Therefore, we grab no locks.
3665

3666
    """
3667
    self.needed_locks = {}
3668

    
3669
  def CheckPrereq(self):
3670
    """Check prerequisites.
3671

3672
    This LU has no prereqs.
3673

3674
    """
3675
    pass
3676

    
3677
  def Exec(self, feedback_fn):
3678
    """Reboots a node.
3679

3680
    """
3681
    result = self.rpc.call_node_powercycle(self.op.node_name,
3682
                                           self.cfg.GetHypervisorType())
3683
    result.Raise("Failed to schedule the reboot")
3684
    return result.payload
3685

    
3686

    
3687
class LUQueryClusterInfo(NoHooksLU):
3688
  """Query cluster configuration.
3689

3690
  """
3691
  _OP_REQP = []
3692
  REQ_BGL = False
3693

    
3694
  def ExpandNames(self):
3695
    self.needed_locks = {}
3696

    
3697
  def CheckPrereq(self):
3698
    """No prerequsites needed for this LU.
3699

3700
    """
3701
    pass
3702

    
3703
  def Exec(self, feedback_fn):
3704
    """Return cluster config.
3705

3706
    """
3707
    cluster = self.cfg.GetClusterInfo()
3708
    os_hvp = {}
3709

    
3710
    # Filter just for enabled hypervisors
3711
    for os_name, hv_dict in cluster.os_hvp.items():
3712
      os_hvp[os_name] = {}
3713
      for hv_name, hv_params in hv_dict.items():
3714
        if hv_name in cluster.enabled_hypervisors:
3715
          os_hvp[os_name][hv_name] = hv_params
3716

    
3717
    result = {
3718
      "software_version": constants.RELEASE_VERSION,
3719
      "protocol_version": constants.PROTOCOL_VERSION,
3720
      "config_version": constants.CONFIG_VERSION,
3721
      "os_api_version": max(constants.OS_API_VERSIONS),
3722
      "export_version": constants.EXPORT_VERSION,
3723
      "architecture": (platform.architecture()[0], platform.machine()),
3724
      "name": cluster.cluster_name,
3725
      "master": cluster.master_node,
3726
      "default_hypervisor": cluster.enabled_hypervisors[0],
3727
      "enabled_hypervisors": cluster.enabled_hypervisors,
3728
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3729
                        for hypervisor_name in cluster.enabled_hypervisors]),
3730
      "os_hvp": os_hvp,
3731
      "beparams": cluster.beparams,
3732
      "nicparams": cluster.nicparams,
3733
      "candidate_pool_size": cluster.candidate_pool_size,
3734
      "master_netdev": cluster.master_netdev,
3735
      "volume_group_name": cluster.volume_group_name,
3736
      "file_storage_dir": cluster.file_storage_dir,
3737
      "maintain_node_health": cluster.maintain_node_health,
3738
      "ctime": cluster.ctime,
3739
      "mtime": cluster.mtime,
3740
      "uuid": cluster.uuid,
3741
      "tags": list(cluster.GetTags()),
3742
      "uid_pool": cluster.uid_pool,
3743
      }
3744

    
3745
    return result
3746

    
3747

    
3748
class LUQueryConfigValues(NoHooksLU):
3749
  """Return configuration values.
3750

3751
  """
3752
  _OP_REQP = []
3753
  REQ_BGL = False
3754
  _FIELDS_DYNAMIC = utils.FieldSet()
3755
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3756
                                  "watcher_pause")
3757

    
3758
  def ExpandNames(self):
3759
    self.needed_locks = {}
3760

    
3761
    _CheckOutputFields(static=self._FIELDS_STATIC,
3762
                       dynamic=self._FIELDS_DYNAMIC,
3763
                       selected=self.op.output_fields)
3764

    
3765
  def CheckPrereq(self):
3766
    """No prerequisites.
3767

3768
    """
3769
    pass
3770

    
3771
  def Exec(self, feedback_fn):
3772
    """Dump a representation of the cluster config to the standard output.
3773

3774
    """
3775
    values = []
3776
    for field in self.op.output_fields:
3777
      if field == "cluster_name":
3778
        entry = self.cfg.GetClusterName()
3779
      elif field == "master_node":
3780
        entry = self.cfg.GetMasterNode()
3781
      elif field == "drain_flag":
3782
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3783
      elif field == "watcher_pause":
3784
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3785
      else:
3786
        raise errors.ParameterError(field)
3787
      values.append(entry)
3788
    return values
3789

    
3790

    
3791
class LUActivateInstanceDisks(NoHooksLU):
3792
  """Bring up an instance's disks.
3793

3794
  """
3795
  _OP_REQP = ["instance_name"]
3796
  REQ_BGL = False
3797

    
3798
  def ExpandNames(self):
3799
    self._ExpandAndLockInstance()
3800
    self.needed_locks[locking.LEVEL_NODE] = []
3801
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3802

    
3803
  def DeclareLocks(self, level):
3804
    if level == locking.LEVEL_NODE:
3805
      self._LockInstancesNodes()
3806

    
3807
  def CheckPrereq(self):
3808
    """Check prerequisites.
3809

3810
    This checks that the instance is in the cluster.
3811

3812
    """
3813
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3814
    assert self.instance is not None, \
3815
      "Cannot retrieve locked instance %s" % self.op.instance_name
3816
    _CheckNodeOnline(self, self.instance.primary_node)
3817
    if not hasattr(self.op, "ignore_size"):
3818
      self.op.ignore_size = False
3819

    
3820
  def Exec(self, feedback_fn):
3821
    """Activate the disks.
3822

3823
    """
3824
    disks_ok, disks_info = \
3825
              _AssembleInstanceDisks(self, self.instance,
3826
                                     ignore_size=self.op.ignore_size)
3827
    if not disks_ok:
3828
      raise errors.OpExecError("Cannot activate block devices")
3829

    
3830
    return disks_info
3831

    
3832

    
3833
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3834
                           ignore_size=False):
3835
  """Prepare the block devices for an instance.
3836

3837
  This sets up the block devices on all nodes.
3838

3839
  @type lu: L{LogicalUnit}
3840
  @param lu: the logical unit on whose behalf we execute
3841
  @type instance: L{objects.Instance}
3842
  @param instance: the instance for whose disks we assemble
3843
  @type ignore_secondaries: boolean
3844
  @param ignore_secondaries: if true, errors on secondary nodes
3845
      won't result in an error return from the function
3846
  @type ignore_size: boolean
3847
  @param ignore_size: if true, the current known size of the disk
3848
      will not be used during the disk activation, useful for cases
3849
      when the size is wrong
3850
  @return: False if the operation failed, otherwise a list of
3851
      (host, instance_visible_name, node_visible_name)
3852
      with the mapping from node devices to instance devices
3853

3854
  """
3855
  device_info = []
3856
  disks_ok = True
3857
  iname = instance.name
3858
  # With the two passes mechanism we try to reduce the window of
3859
  # opportunity for the race condition of switching DRBD to primary
3860
  # before handshaking occured, but we do not eliminate it
3861

    
3862
  # The proper fix would be to wait (with some limits) until the
3863
  # connection has been made and drbd transitions from WFConnection
3864
  # into any other network-connected state (Connected, SyncTarget,
3865
  # SyncSource, etc.)
3866

    
3867
  # 1st pass, assemble on all nodes in secondary mode
3868
  for inst_disk in instance.disks:
3869
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3870
      if ignore_size:
3871
        node_disk = node_disk.Copy()
3872
        node_disk.UnsetSize()
3873
      lu.cfg.SetDiskID(node_disk, node)
3874
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3875
      msg = result.fail_msg
3876
      if msg:
3877
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3878
                           " (is_primary=False, pass=1): %s",
3879
                           inst_disk.iv_name, node, msg)
3880
        if not ignore_secondaries:
3881
          disks_ok = False
3882

    
3883
  # FIXME: race condition on drbd migration to primary
3884

    
3885
  # 2nd pass, do only the primary node
3886
  for inst_disk in instance.disks:
3887
    dev_path = None
3888

    
3889
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3890
      if node != instance.primary_node:
3891
        continue
3892
      if ignore_size:
3893
        node_disk = node_disk.Copy()
3894
        node_disk.UnsetSize()
3895
      lu.cfg.SetDiskID(node_disk, node)
3896
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3897
      msg = result.fail_msg
3898
      if msg:
3899
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3900
                           " (is_primary=True, pass=2): %s",
3901
                           inst_disk.iv_name, node, msg)
3902
        disks_ok = False
3903
      else:
3904
        dev_path = result.payload
3905

    
3906
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3907

    
3908
  # leave the disks configured for the primary node
3909
  # this is a workaround that would be fixed better by
3910
  # improving the logical/physical id handling
3911
  for disk in instance.disks:
3912
    lu.cfg.SetDiskID(disk, instance.primary_node)
3913

    
3914
  return disks_ok, device_info
3915

    
3916

    
3917
def _StartInstanceDisks(lu, instance, force):
3918
  """Start the disks of an instance.
3919

3920
  """
3921
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3922
                                           ignore_secondaries=force)
3923
  if not disks_ok:
3924
    _ShutdownInstanceDisks(lu, instance)
3925
    if force is not None and not force:
3926
      lu.proc.LogWarning("", hint="If the message above refers to a"
3927
                         " secondary node,"
3928
                         " you can retry the operation using '--force'.")
3929
    raise errors.OpExecError("Disk consistency error")
3930

    
3931

    
3932
class LUDeactivateInstanceDisks(NoHooksLU):
3933
  """Shutdown an instance's disks.
3934

3935
  """
3936
  _OP_REQP = ["instance_name"]
3937
  REQ_BGL = False
3938

    
3939
  def ExpandNames(self):
3940
    self._ExpandAndLockInstance()
3941
    self.needed_locks[locking.LEVEL_NODE] = []
3942
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3943

    
3944
  def DeclareLocks(self, level):
3945
    if level == locking.LEVEL_NODE:
3946
      self._LockInstancesNodes()
3947

    
3948
  def CheckPrereq(self):
3949
    """Check prerequisites.
3950

3951
    This checks that the instance is in the cluster.
3952

3953
    """
3954
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3955
    assert self.instance is not None, \
3956
      "Cannot retrieve locked instance %s" % self.op.instance_name
3957

    
3958
  def Exec(self, feedback_fn):
3959
    """Deactivate the disks
3960

3961
    """
3962
    instance = self.instance
3963
    _SafeShutdownInstanceDisks(self, instance)
3964

    
3965

    
3966
def _SafeShutdownInstanceDisks(lu, instance):
3967
  """Shutdown block devices of an instance.
3968

3969
  This function checks if an instance is running, before calling
3970
  _ShutdownInstanceDisks.
3971

3972
  """
3973
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
3974
  _ShutdownInstanceDisks(lu, instance)
3975

    
3976

    
3977
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3978
  """Shutdown block devices of an instance.
3979

3980
  This does the shutdown on all nodes of the instance.
3981

3982
  If the ignore_primary is false, errors on the primary node are
3983
  ignored.
3984

3985
  """
3986
  all_result = True
3987
  for disk in instance.disks:
3988
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3989
      lu.cfg.SetDiskID(top_disk, node)
3990
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3991
      msg = result.fail_msg
3992
      if msg:
3993
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3994
                      disk.iv_name, node, msg)
3995
        if not ignore_primary or node != instance.primary_node:
3996
          all_result = False
3997
  return all_result
3998

    
3999

    
4000
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4001
  """Checks if a node has enough free memory.
4002

4003
  This function check if a given node has the needed amount of free
4004
  memory. In case the node has less memory or we cannot get the
4005
  information from the node, this function raise an OpPrereqError
4006
  exception.
4007

4008
  @type lu: C{LogicalUnit}
4009
  @param lu: a logical unit from which we get configuration data
4010
  @type node: C{str}
4011
  @param node: the node to check
4012
  @type reason: C{str}
4013
  @param reason: string to use in the error message
4014
  @type requested: C{int}
4015
  @param requested: the amount of memory in MiB to check for
4016
  @type hypervisor_name: C{str}
4017
  @param hypervisor_name: the hypervisor to ask for memory stats
4018
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4019
      we cannot check the node
4020

4021
  """
4022
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4023
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4024
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4025
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4026
  if not isinstance(free_mem, int):
4027
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4028
                               " was '%s'" % (node, free_mem),
4029
                               errors.ECODE_ENVIRON)
4030
  if requested > free_mem:
4031
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4032
                               " needed %s MiB, available %s MiB" %
4033
                               (node, reason, requested, free_mem),
4034
                               errors.ECODE_NORES)
4035

    
4036

    
4037
def _CheckNodesFreeDisk(lu, nodenames, requested):
4038
  """Checks if nodes have enough free disk space in the default VG.
4039

4040
  This function check if all given nodes have the needed amount of
4041
  free disk. In case any node has less disk or we cannot get the
4042
  information from the node, this function raise an OpPrereqError
4043
  exception.
4044

4045
  @type lu: C{LogicalUnit}
4046
  @param lu: a logical unit from which we get configuration data
4047
  @type nodenames: C{list}
4048
  @param nodenames: the list of node names to check
4049
  @type requested: C{int}
4050
  @param requested: the amount of disk in MiB to check for
4051
  @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4052
      we cannot check the node
4053

4054
  """
4055
  nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4056
                                   lu.cfg.GetHypervisorType())
4057
  for node in nodenames:
4058
    info = nodeinfo[node]
4059
    info.Raise("Cannot get current information from node %s" % node,
4060
               prereq=True, ecode=errors.ECODE_ENVIRON)
4061
    vg_free = info.payload.get("vg_free", None)
4062
    if not isinstance(vg_free, int):
4063
      raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4064
                                 " result was '%s'" % (node, vg_free),
4065
                                 errors.ECODE_ENVIRON)
4066
    if requested > vg_free:
4067
      raise errors.OpPrereqError("Not enough disk space on target node %s:"
4068
                                 " required %d MiB, available %d MiB" %
4069
                                 (node, requested, vg_free),
4070
                                 errors.ECODE_NORES)
4071

    
4072

    
4073
class LUStartupInstance(LogicalUnit):
4074
  """Starts an instance.
4075

4076
  """
4077
  HPATH = "instance-start"
4078
  HTYPE = constants.HTYPE_INSTANCE
4079
  _OP_REQP = ["instance_name", "force"]
4080
  REQ_BGL = False
4081

    
4082
  def ExpandNames(self):
4083
    self._ExpandAndLockInstance()
4084

    
4085
  def BuildHooksEnv(self):
4086
    """Build hooks env.
4087

4088
    This runs on master, primary and secondary nodes of the instance.
4089

4090
    """
4091
    env = {
4092
      "FORCE": self.op.force,
4093
      }
4094
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4095
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4096
    return env, nl, nl
4097

    
4098
  def CheckPrereq(self):
4099
    """Check prerequisites.
4100

4101
    This checks that the instance is in the cluster.
4102

4103
    """
4104
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4105
    assert self.instance is not None, \
4106
      "Cannot retrieve locked instance %s" % self.op.instance_name
4107

    
4108
    # extra beparams
4109
    self.beparams = getattr(self.op, "beparams", {})
4110
    if self.beparams:
4111
      if not isinstance(self.beparams, dict):
4112
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4113
                                   " dict" % (type(self.beparams), ),
4114
                                   errors.ECODE_INVAL)
4115
      # fill the beparams dict
4116
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4117
      self.op.beparams = self.beparams
4118

    
4119
    # extra hvparams
4120
    self.hvparams = getattr(self.op, "hvparams", {})
4121
    if self.hvparams:
4122
      if not isinstance(self.hvparams, dict):
4123
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4124
                                   " dict" % (type(self.hvparams), ),
4125
                                   errors.ECODE_INVAL)
4126

    
4127
      # check hypervisor parameter syntax (locally)
4128
      cluster = self.cfg.GetClusterInfo()
4129
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4130
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
4131
                                    instance.hvparams)
4132
      filled_hvp.update(self.hvparams)
4133
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4134
      hv_type.CheckParameterSyntax(filled_hvp)
4135
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4136
      self.op.hvparams = self.hvparams
4137

    
4138
    _CheckNodeOnline(self, instance.primary_node)
4139

    
4140
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4141
    # check bridges existence
4142
    _CheckInstanceBridgesExist(self, instance)
4143

    
4144
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4145
                                              instance.name,
4146
                                              instance.hypervisor)
4147
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4148
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4149
    if not remote_info.payload: # not running already
4150
      _CheckNodeFreeMemory(self, instance.primary_node,
4151
                           "starting instance %s" % instance.name,
4152
                           bep[constants.BE_MEMORY], instance.hypervisor)
4153

    
4154
  def Exec(self, feedback_fn):
4155
    """Start the instance.
4156

4157
    """
4158
    instance = self.instance
4159
    force = self.op.force
4160

    
4161
    self.cfg.MarkInstanceUp(instance.name)
4162

    
4163
    node_current = instance.primary_node
4164

    
4165
    _StartInstanceDisks(self, instance, force)
4166

    
4167
    result = self.rpc.call_instance_start(node_current, instance,
4168
                                          self.hvparams, self.beparams)
4169
    msg = result.fail_msg
4170
    if msg:
4171
      _ShutdownInstanceDisks(self, instance)
4172
      raise errors.OpExecError("Could not start instance: %s" % msg)
4173

    
4174

    
4175
class LURebootInstance(LogicalUnit):
4176
  """Reboot an instance.
4177

4178
  """
4179
  HPATH = "instance-reboot"
4180
  HTYPE = constants.HTYPE_INSTANCE
4181
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4182
  REQ_BGL = False
4183

    
4184
  def CheckArguments(self):
4185
    """Check the arguments.
4186

4187
    """
4188
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4189
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4190

    
4191
  def ExpandNames(self):
4192
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4193
                                   constants.INSTANCE_REBOOT_HARD,
4194
                                   constants.INSTANCE_REBOOT_FULL]:
4195
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4196
                                  (constants.INSTANCE_REBOOT_SOFT,
4197
                                   constants.INSTANCE_REBOOT_HARD,
4198
                                   constants.INSTANCE_REBOOT_FULL))
4199
    self._ExpandAndLockInstance()
4200

    
4201
  def BuildHooksEnv(self):
4202
    """Build hooks env.
4203

4204
    This runs on master, primary and secondary nodes of the instance.
4205

4206
    """
4207
    env = {
4208
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4209
      "REBOOT_TYPE": self.op.reboot_type,
4210
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4211
      }
4212
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4213
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4214
    return env, nl, nl
4215

    
4216
  def CheckPrereq(self):
4217
    """Check prerequisites.
4218

4219
    This checks that the instance is in the cluster.
4220

4221
    """
4222
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4223
    assert self.instance is not None, \
4224
      "Cannot retrieve locked instance %s" % self.op.instance_name
4225

    
4226
    _CheckNodeOnline(self, instance.primary_node)
4227

    
4228
    # check bridges existence
4229
    _CheckInstanceBridgesExist(self, instance)
4230

    
4231
  def Exec(self, feedback_fn):
4232
    """Reboot the instance.
4233

4234
    """
4235
    instance = self.instance
4236
    ignore_secondaries = self.op.ignore_secondaries
4237
    reboot_type = self.op.reboot_type
4238

    
4239
    node_current = instance.primary_node
4240

    
4241
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4242
                       constants.INSTANCE_REBOOT_HARD]:
4243
      for disk in instance.disks:
4244
        self.cfg.SetDiskID(disk, node_current)
4245
      result = self.rpc.call_instance_reboot(node_current, instance,
4246
                                             reboot_type,
4247
                                             self.shutdown_timeout)
4248
      result.Raise("Could not reboot instance")
4249
    else:
4250
      result = self.rpc.call_instance_shutdown(node_current, instance,
4251
                                               self.shutdown_timeout)
4252
      result.Raise("Could not shutdown instance for full reboot")
4253
      _ShutdownInstanceDisks(self, instance)
4254
      _StartInstanceDisks(self, instance, ignore_secondaries)
4255
      result = self.rpc.call_instance_start(node_current, instance, None, None)
4256
      msg = result.fail_msg
4257
      if msg:
4258
        _ShutdownInstanceDisks(self, instance)
4259
        raise errors.OpExecError("Could not start instance for"
4260
                                 " full reboot: %s" % msg)
4261

    
4262
    self.cfg.MarkInstanceUp(instance.name)
4263

    
4264

    
4265
class LUShutdownInstance(LogicalUnit):
4266
  """Shutdown an instance.
4267

4268
  """
4269
  HPATH = "instance-stop"
4270
  HTYPE = constants.HTYPE_INSTANCE
4271
  _OP_REQP = ["instance_name"]
4272
  REQ_BGL = False
4273

    
4274
  def CheckArguments(self):
4275
    """Check the arguments.
4276

4277
    """
4278
    self.timeout = getattr(self.op, "timeout",
4279
                           constants.DEFAULT_SHUTDOWN_TIMEOUT)
4280

    
4281
  def ExpandNames(self):
4282
    self._ExpandAndLockInstance()
4283

    
4284
  def BuildHooksEnv(self):
4285
    """Build hooks env.
4286

4287
    This runs on master, primary and secondary nodes of the instance.
4288

4289
    """
4290
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4291
    env["TIMEOUT"] = self.timeout
4292
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4293
    return env, nl, nl
4294

    
4295
  def CheckPrereq(self):
4296
    """Check prerequisites.
4297

4298
    This checks that the instance is in the cluster.
4299

4300
    """
4301
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4302
    assert self.instance is not None, \
4303
      "Cannot retrieve locked instance %s" % self.op.instance_name
4304
    _CheckNodeOnline(self, self.instance.primary_node)
4305

    
4306
  def Exec(self, feedback_fn):
4307
    """Shutdown the instance.
4308

4309
    """
4310
    instance = self.instance
4311
    node_current = instance.primary_node
4312
    timeout = self.timeout
4313
    self.cfg.MarkInstanceDown(instance.name)
4314
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4315
    msg = result.fail_msg
4316
    if msg:
4317
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4318

    
4319
    _ShutdownInstanceDisks(self, instance)
4320

    
4321

    
4322
class LUReinstallInstance(LogicalUnit):
4323
  """Reinstall an instance.
4324

4325
  """
4326
  HPATH = "instance-reinstall"
4327
  HTYPE = constants.HTYPE_INSTANCE
4328
  _OP_REQP = ["instance_name"]
4329
  REQ_BGL = False
4330

    
4331
  def ExpandNames(self):
4332
    self._ExpandAndLockInstance()
4333

    
4334
  def BuildHooksEnv(self):
4335
    """Build hooks env.
4336

4337
    This runs on master, primary and secondary nodes of the instance.
4338

4339
    """
4340
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4341
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4342
    return env, nl, nl
4343

    
4344
  def CheckPrereq(self):
4345
    """Check prerequisites.
4346

4347
    This checks that the instance is in the cluster and is not running.
4348

4349
    """
4350
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4351
    assert instance is not None, \
4352
      "Cannot retrieve locked instance %s" % self.op.instance_name
4353
    _CheckNodeOnline(self, instance.primary_node)
4354

    
4355
    if instance.disk_template == constants.DT_DISKLESS:
4356
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4357
                                 self.op.instance_name,
4358
                                 errors.ECODE_INVAL)
4359
    _CheckInstanceDown(self, instance, "cannot reinstall")
4360

    
4361
    self.op.os_type = getattr(self.op, "os_type", None)
4362
    self.op.force_variant = getattr(self.op, "force_variant", False)
4363
    if self.op.os_type is not None:
4364
      # OS verification
4365
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4366
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4367

    
4368
    self.instance = instance
4369

    
4370
  def Exec(self, feedback_fn):
4371
    """Reinstall the instance.
4372

4373
    """
4374
    inst = self.instance
4375

    
4376
    if self.op.os_type is not None:
4377
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4378
      inst.os = self.op.os_type
4379
      self.cfg.Update(inst, feedback_fn)
4380

    
4381
    _StartInstanceDisks(self, inst, None)
4382
    try:
4383
      feedback_fn("Running the instance OS create scripts...")
4384
      # FIXME: pass debug option from opcode to backend
4385
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4386
                                             self.op.debug_level)
4387
      result.Raise("Could not install OS for instance %s on node %s" %
4388
                   (inst.name, inst.primary_node))
4389
    finally:
4390
      _ShutdownInstanceDisks(self, inst)
4391

    
4392

    
4393
class LURecreateInstanceDisks(LogicalUnit):
4394
  """Recreate an instance's missing disks.
4395

4396
  """
4397
  HPATH = "instance-recreate-disks"
4398
  HTYPE = constants.HTYPE_INSTANCE
4399
  _OP_REQP = ["instance_name", "disks"]
4400
  REQ_BGL = False
4401

    
4402
  def CheckArguments(self):
4403
    """Check the arguments.
4404

4405
    """
4406
    if not isinstance(self.op.disks, list):
4407
      raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4408
    for item in self.op.disks:
4409
      if (not isinstance(item, int) or
4410
          item < 0):
4411
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
4412
                                   str(item), errors.ECODE_INVAL)
4413

    
4414
  def ExpandNames(self):
4415
    self._ExpandAndLockInstance()
4416

    
4417
  def BuildHooksEnv(self):
4418
    """Build hooks env.
4419

4420
    This runs on master, primary and secondary nodes of the instance.
4421

4422
    """
4423
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4424
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4425
    return env, nl, nl
4426

    
4427
  def CheckPrereq(self):
4428
    """Check prerequisites.
4429

4430
    This checks that the instance is in the cluster and is not running.
4431

4432
    """
4433
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4434
    assert instance is not None, \
4435
      "Cannot retrieve locked instance %s" % self.op.instance_name
4436
    _CheckNodeOnline(self, instance.primary_node)
4437

    
4438
    if instance.disk_template == constants.DT_DISKLESS:
4439
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4440
                                 self.op.instance_name, errors.ECODE_INVAL)
4441
    _CheckInstanceDown(self, instance, "cannot recreate disks")
4442

    
4443
    if not self.op.disks:
4444
      self.op.disks = range(len(instance.disks))
4445
    else:
4446
      for idx in self.op.disks:
4447
        if idx >= len(instance.disks):
4448
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4449
                                     errors.ECODE_INVAL)
4450

    
4451
    self.instance = instance
4452

    
4453
  def Exec(self, feedback_fn):
4454
    """Recreate the disks.
4455

4456
    """
4457
    to_skip = []
4458
    for idx, _ in enumerate(self.instance.disks):
4459
      if idx not in self.op.disks: # disk idx has not been passed in
4460
        to_skip.append(idx)
4461
        continue
4462

    
4463
    _CreateDisks(self, self.instance, to_skip=to_skip)
4464

    
4465

    
4466
class LURenameInstance(LogicalUnit):
4467
  """Rename an instance.
4468

4469
  """
4470
  HPATH = "instance-rename"
4471
  HTYPE = constants.HTYPE_INSTANCE
4472
  _OP_REQP = ["instance_name", "new_name"]
4473

    
4474
  def BuildHooksEnv(self):
4475
    """Build hooks env.
4476

4477
    This runs on master, primary and secondary nodes of the instance.
4478

4479
    """
4480
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4481
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4482
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4483
    return env, nl, nl
4484

    
4485
  def CheckPrereq(self):
4486
    """Check prerequisites.
4487

4488
    This checks that the instance is in the cluster and is not running.
4489

4490
    """
4491
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4492
                                                self.op.instance_name)
4493
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4494
    assert instance is not None
4495
    _CheckNodeOnline(self, instance.primary_node)
4496
    _CheckInstanceDown(self, instance, "cannot rename")
4497
    self.instance = instance
4498

    
4499
    # new name verification
4500
    name_info = utils.GetHostInfo(self.op.new_name)
4501

    
4502
    self.op.new_name = new_name = name_info.name
4503
    instance_list = self.cfg.GetInstanceList()
4504
    if new_name in instance_list:
4505
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4506
                                 new_name, errors.ECODE_EXISTS)
4507

    
4508
    if not getattr(self.op, "ignore_ip", False):
4509
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4510
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4511
                                   (name_info.ip, new_name),
4512
                                   errors.ECODE_NOTUNIQUE)
4513

    
4514

    
4515
  def Exec(self, feedback_fn):
4516
    """Reinstall the instance.
4517

4518
    """
4519
    inst = self.instance
4520
    old_name = inst.name
4521

    
4522
    if inst.disk_template == constants.DT_FILE:
4523
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4524

    
4525
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4526
    # Change the instance lock. This is definitely safe while we hold the BGL
4527
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4528
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4529

    
4530
    # re-read the instance from the configuration after rename
4531
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4532

    
4533
    if inst.disk_template == constants.DT_FILE:
4534
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4535
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4536
                                                     old_file_storage_dir,
4537
                                                     new_file_storage_dir)
4538
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4539
                   " (but the instance has been renamed in Ganeti)" %
4540
                   (inst.primary_node, old_file_storage_dir,
4541
                    new_file_storage_dir))
4542

    
4543
    _StartInstanceDisks(self, inst, None)
4544
    try:
4545
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4546
                                                 old_name, self.op.debug_level)
4547
      msg = result.fail_msg
4548
      if msg:
4549
        msg = ("Could not run OS rename script for instance %s on node %s"
4550
               " (but the instance has been renamed in Ganeti): %s" %
4551
               (inst.name, inst.primary_node, msg))
4552
        self.proc.LogWarning(msg)
4553
    finally:
4554
      _ShutdownInstanceDisks(self, inst)
4555

    
4556

    
4557
class LURemoveInstance(LogicalUnit):
4558
  """Remove an instance.
4559

4560
  """
4561
  HPATH = "instance-remove"
4562
  HTYPE = constants.HTYPE_INSTANCE
4563
  _OP_REQP = ["instance_name", "ignore_failures"]
4564
  REQ_BGL = False
4565

    
4566
  def CheckArguments(self):
4567
    """Check the arguments.
4568

4569
    """
4570
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4571
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4572

    
4573
  def ExpandNames(self):
4574
    self._ExpandAndLockInstance()
4575
    self.needed_locks[locking.LEVEL_NODE] = []
4576
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4577

    
4578
  def DeclareLocks(self, level):
4579
    if level == locking.LEVEL_NODE:
4580
      self._LockInstancesNodes()
4581

    
4582
  def BuildHooksEnv(self):
4583
    """Build hooks env.
4584

4585
    This runs on master, primary and secondary nodes of the instance.
4586

4587
    """
4588
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4589
    env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4590
    nl = [self.cfg.GetMasterNode()]
4591
    nl_post = list(self.instance.all_nodes) + nl
4592
    return env, nl, nl_post
4593

    
4594
  def CheckPrereq(self):
4595
    """Check prerequisites.
4596

4597
    This checks that the instance is in the cluster.
4598

4599
    """
4600
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4601
    assert self.instance is not None, \
4602
      "Cannot retrieve locked instance %s" % self.op.instance_name
4603

    
4604
  def Exec(self, feedback_fn):
4605
    """Remove the instance.
4606

4607
    """
4608
    instance = self.instance
4609
    logging.info("Shutting down instance %s on node %s",
4610
                 instance.name, instance.primary_node)
4611

    
4612
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4613
                                             self.shutdown_timeout)
4614
    msg = result.fail_msg
4615
    if msg:
4616
      if self.op.ignore_failures:
4617
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
4618
      else:
4619
        raise errors.OpExecError("Could not shutdown instance %s on"
4620
                                 " node %s: %s" %
4621
                                 (instance.name, instance.primary_node, msg))
4622

    
4623
    logging.info("Removing block devices for instance %s", instance.name)
4624

    
4625
    if not _RemoveDisks(self, instance):
4626
      if self.op.ignore_failures:
4627
        feedback_fn("Warning: can't remove instance's disks")
4628
      else:
4629
        raise errors.OpExecError("Can't remove instance's disks")
4630

    
4631
    logging.info("Removing instance %s out of cluster config", instance.name)
4632

    
4633
    self.cfg.RemoveInstance(instance.name)
4634
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4635

    
4636

    
4637
class LUQueryInstances(NoHooksLU):
4638
  """Logical unit for querying instances.
4639

4640
  """
4641
  # pylint: disable-msg=W0142
4642
  _OP_REQP = ["output_fields", "names", "use_locking"]
4643
  REQ_BGL = False
4644
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4645
                    "serial_no", "ctime", "mtime", "uuid"]
4646
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4647
                                    "admin_state",
4648
                                    "disk_template", "ip", "mac", "bridge",
4649
                                    "nic_mode", "nic_link",
4650
                                    "sda_size", "sdb_size", "vcpus", "tags",
4651
                                    "network_port", "beparams",
4652
                                    r"(disk)\.(size)/([0-9]+)",
4653
                                    r"(disk)\.(sizes)", "disk_usage",
4654
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4655
                                    r"(nic)\.(bridge)/([0-9]+)",
4656
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
4657
                                    r"(disk|nic)\.(count)",
4658
                                    "hvparams",
4659
                                    ] + _SIMPLE_FIELDS +
4660
                                  ["hv/%s" % name
4661
                                   for name in constants.HVS_PARAMETERS
4662
                                   if name not in constants.HVC_GLOBALS] +
4663
                                  ["be/%s" % name
4664
                                   for name in constants.BES_PARAMETERS])
4665
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4666

    
4667

    
4668
  def ExpandNames(self):
4669
    _CheckOutputFields(static=self._FIELDS_STATIC,
4670
                       dynamic=self._FIELDS_DYNAMIC,
4671
                       selected=self.op.output_fields)
4672

    
4673
    self.needed_locks = {}
4674
    self.share_locks[locking.LEVEL_INSTANCE] = 1
4675
    self.share_locks[locking.LEVEL_NODE] = 1
4676

    
4677
    if self.op.names:
4678
      self.wanted = _GetWantedInstances(self, self.op.names)
4679
    else:
4680
      self.wanted = locking.ALL_SET
4681

    
4682
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4683
    self.do_locking = self.do_node_query and self.op.use_locking
4684
    if self.do_locking:
4685
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4686
      self.needed_locks[locking.LEVEL_NODE] = []
4687
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4688

    
4689
  def DeclareLocks(self, level):
4690
    if level == locking.LEVEL_NODE and self.do_locking:
4691
      self._LockInstancesNodes()
4692

    
4693
  def CheckPrereq(self):
4694
    """Check prerequisites.
4695

4696
    """
4697
    pass
4698

    
4699
  def Exec(self, feedback_fn):
4700
    """Computes the list of nodes and their attributes.
4701

4702
    """
4703
    # pylint: disable-msg=R0912
4704
    # way too many branches here
4705
    all_info = self.cfg.GetAllInstancesInfo()
4706
    if self.wanted == locking.ALL_SET:
4707
      # caller didn't specify instance names, so ordering is not important
4708
      if self.do_locking:
4709
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4710
      else:
4711
        instance_names = all_info.keys()
4712
      instance_names = utils.NiceSort(instance_names)
4713
    else:
4714
      # caller did specify names, so we must keep the ordering
4715
      if self.do_locking:
4716
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4717
      else:
4718
        tgt_set = all_info.keys()
4719
      missing = set(self.wanted).difference(tgt_set)
4720
      if missing:
4721
        raise errors.OpExecError("Some instances were removed before"
4722
                                 " retrieving their data: %s" % missing)
4723
      instance_names = self.wanted
4724

    
4725
    instance_list = [all_info[iname] for iname in instance_names]
4726

    
4727
    # begin data gathering
4728

    
4729
    nodes = frozenset([inst.primary_node for inst in instance_list])
4730
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4731

    
4732
    bad_nodes = []
4733
    off_nodes = []
4734
    if self.do_node_query:
4735
      live_data = {}
4736
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4737
      for name in nodes:
4738
        result = node_data[name]
4739
        if result.offline:
4740
          # offline nodes will be in both lists
4741
          off_nodes.append(name)
4742
        if result.fail_msg:
4743
          bad_nodes.append(name)
4744
        else:
4745
          if result.payload:
4746
            live_data.update(result.payload)
4747
          # else no instance is alive
4748
    else:
4749
      live_data = dict([(name, {}) for name in instance_names])
4750

    
4751
    # end data gathering
4752

    
4753
    HVPREFIX = "hv/"
4754
    BEPREFIX = "be/"
4755
    output = []
4756
    cluster = self.cfg.GetClusterInfo()
4757
    for instance in instance_list:
4758
      iout = []
4759
      i_hv = cluster.FillHV(instance, skip_globals=True)
4760
      i_be = cluster.FillBE(instance)
4761
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4762
                                 nic.nicparams) for nic in instance.nics]
4763
      for field in self.op.output_fields:
4764
        st_match = self._FIELDS_STATIC.Matches(field)
4765
        if field in self._SIMPLE_FIELDS:
4766
          val = getattr(instance, field)
4767
        elif field == "pnode":
4768
          val = instance.primary_node
4769
        elif field == "snodes":
4770
          val = list(instance.secondary_nodes)
4771
        elif field == "admin_state":
4772
          val = instance.admin_up
4773
        elif field == "oper_state":
4774
          if instance.primary_node in bad_nodes:
4775
            val = None
4776
          else:
4777
            val = bool(live_data.get(instance.name))
4778
        elif field == "status":
4779
          if instance.primary_node in off_nodes:
4780
            val = "ERROR_nodeoffline"
4781
          elif instance.primary_node in bad_nodes:
4782
            val = "ERROR_nodedown"
4783
          else:
4784
            running = bool(live_data.get(instance.name))
4785
            if running:
4786
              if instance.admin_up:
4787
                val = "running"
4788
              else:
4789
                val = "ERROR_up"
4790
            else:
4791
              if instance.admin_up:
4792
                val = "ERROR_down"
4793
              else:
4794
                val = "ADMIN_down"
4795
        elif field == "oper_ram":
4796
          if instance.primary_node in bad_nodes:
4797
            val = None
4798
          elif instance.name in live_data:
4799
            val = live_data[instance.name].get("memory", "?")
4800
          else:
4801
            val = "-"
4802
        elif field == "vcpus":
4803
          val = i_be[constants.BE_VCPUS]
4804
        elif field == "disk_template":
4805
          val = instance.disk_template
4806
        elif field == "ip":
4807
          if instance.nics:
4808
            val = instance.nics[0].ip
4809
          else:
4810
            val = None
4811
        elif field == "nic_mode":
4812
          if instance.nics:
4813
            val = i_nicp[0][constants.NIC_MODE]
4814
          else:
4815
            val = None
4816
        elif field == "nic_link":
4817
          if instance.nics:
4818
            val = i_nicp[0][constants.NIC_LINK]
4819
          else:
4820
            val = None
4821
        elif field == "bridge":
4822
          if (instance.nics and
4823
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4824
            val = i_nicp[0][constants.NIC_LINK]
4825
          else:
4826
            val = None
4827
        elif field == "mac":
4828
          if instance.nics:
4829
            val = instance.nics[0].mac
4830
          else:
4831
            val = None
4832
        elif field == "sda_size" or field == "sdb_size":
4833
          idx = ord(field[2]) - ord('a')
4834
          try:
4835
            val = instance.FindDisk(idx).size
4836
          except errors.OpPrereqError:
4837
            val = None
4838
        elif field == "disk_usage": # total disk usage per node
4839
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
4840
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4841
        elif field == "tags":
4842
          val = list(instance.GetTags())
4843
        elif field == "hvparams":
4844
          val = i_hv
4845
        elif (field.startswith(HVPREFIX) and
4846
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4847
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4848
          val = i_hv.get(field[len(HVPREFIX):], None)
4849
        elif field == "beparams":
4850
          val = i_be
4851
        elif (field.startswith(BEPREFIX) and
4852
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4853
          val = i_be.get(field[len(BEPREFIX):], None)
4854
        elif st_match and st_match.groups():
4855
          # matches a variable list
4856
          st_groups = st_match.groups()
4857
          if st_groups and st_groups[0] == "disk":
4858
            if st_groups[1] == "count":
4859
              val = len(instance.disks)
4860
            elif st_groups[1] == "sizes":
4861
              val = [disk.size for disk in instance.disks]
4862
            elif st_groups[1] == "size":
4863
              try:
4864
                val = instance.FindDisk(st_groups[2]).size
4865
              except errors.OpPrereqError:
4866
                val = None
4867
            else:
4868
              assert False, "Unhandled disk parameter"
4869
          elif st_groups[0] == "nic":
4870
            if st_groups[1] == "count":
4871
              val = len(instance.nics)
4872
            elif st_groups[1] == "macs":
4873
              val = [nic.mac for nic in instance.nics]
4874
            elif st_groups[1] == "ips":
4875
              val = [nic.ip for nic in instance.nics]
4876
            elif st_groups[1] == "modes":
4877
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4878
            elif st_groups[1] == "links":
4879
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4880
            elif st_groups[1] == "bridges":
4881
              val = []
4882
              for nicp in i_nicp:
4883
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4884
                  val.append(nicp[constants.NIC_LINK])
4885
                else:
4886
                  val.append(None)
4887
            else:
4888
              # index-based item
4889
              nic_idx = int(st_groups[2])
4890
              if nic_idx >= len(instance.nics):
4891
                val = None
4892
              else:
4893
                if st_groups[1] == "mac":
4894
                  val = instance.nics[nic_idx].mac
4895
                elif st_groups[1] == "ip":
4896
                  val = instance.nics[nic_idx].ip
4897
                elif st_groups[1] == "mode":
4898
                  val = i_nicp[nic_idx][constants.NIC_MODE]
4899
                elif st_groups[1] == "link":
4900
                  val = i_nicp[nic_idx][constants.NIC_LINK]
4901
                elif st_groups[1] == "bridge":
4902
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4903
                  if nic_mode == constants.NIC_MODE_BRIDGED:
4904
                    val = i_nicp[nic_idx][constants.NIC_LINK]
4905
                  else:
4906
                    val = None
4907
                else:
4908
                  assert False, "Unhandled NIC parameter"
4909
          else:
4910
            assert False, ("Declared but unhandled variable parameter '%s'" %
4911
                           field)
4912
        else:
4913
          assert False, "Declared but unhandled parameter '%s'" % field
4914
        iout.append(val)
4915
      output.append(iout)
4916

    
4917
    return output
4918

    
4919

    
4920
class LUFailoverInstance(LogicalUnit):
4921
  """Failover an instance.
4922

4923
  """
4924
  HPATH = "instance-failover"
4925
  HTYPE = constants.HTYPE_INSTANCE
4926
  _OP_REQP = ["instance_name", "ignore_consistency"]
4927
  REQ_BGL = False
4928

    
4929
  def CheckArguments(self):
4930
    """Check the arguments.
4931

4932
    """
4933
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4934
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4935

    
4936
  def ExpandNames(self):
4937
    self._ExpandAndLockInstance()
4938
    self.needed_locks[locking.LEVEL_NODE] = []
4939
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4940

    
4941
  def DeclareLocks(self, level):
4942
    if level == locking.LEVEL_NODE:
4943
      self._LockInstancesNodes()
4944

    
4945
  def BuildHooksEnv(self):
4946
    """Build hooks env.
4947

4948
    This runs on master, primary and secondary nodes of the instance.
4949

4950
    """
4951
    instance = self.instance
4952
    source_node = instance.primary_node
4953
    target_node = instance.secondary_nodes[0]
4954
    env = {
4955
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4956
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4957
      "OLD_PRIMARY": source_node,
4958
      "OLD_SECONDARY": target_node,
4959
      "NEW_PRIMARY": target_node,
4960
      "NEW_SECONDARY": source_node,
4961
      }
4962
    env.update(_BuildInstanceHookEnvByObject(self, instance))
4963
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4964
    nl_post = list(nl)
4965
    nl_post.append(source_node)
4966
    return env, nl, nl_post
4967

    
4968
  def CheckPrereq(self):
4969
    """Check prerequisites.
4970

4971
    This checks that the instance is in the cluster.
4972

4973
    """
4974
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4975
    assert self.instance is not None, \
4976
      "Cannot retrieve locked instance %s" % self.op.instance_name
4977

    
4978
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4979
    if instance.disk_template not in constants.DTS_NET_MIRROR:
4980
      raise errors.OpPrereqError("Instance's disk layout is not"
4981
                                 " network mirrored, cannot failover.",
4982
                                 errors.ECODE_STATE)
4983

    
4984
    secondary_nodes = instance.secondary_nodes
4985
    if not secondary_nodes:
4986
      raise errors.ProgrammerError("no secondary node but using "
4987
                                   "a mirrored disk template")
4988

    
4989
    target_node = secondary_nodes[0]
4990
    _CheckNodeOnline(self, target_node)
4991
    _CheckNodeNotDrained(self, target_node)
4992
    if instance.admin_up:
4993
      # check memory requirements on the secondary node
4994
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4995
                           instance.name, bep[constants.BE_MEMORY],
4996
                           instance.hypervisor)
4997
    else:
4998
      self.LogInfo("Not checking memory on the secondary node as"
4999
                   " instance will not be started")
5000

    
5001
    # check bridge existance
5002
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5003

    
5004
  def Exec(self, feedback_fn):
5005
    """Failover an instance.
5006

5007
    The failover is done by shutting it down on its present node and
5008
    starting it on the secondary.
5009

5010
    """
5011
    instance = self.instance
5012

    
5013
    source_node = instance.primary_node
5014
    target_node = instance.secondary_nodes[0]
5015

    
5016
    if instance.admin_up:
5017
      feedback_fn("* checking disk consistency between source and target")
5018
      for dev in instance.disks:
5019
        # for drbd, these are drbd over lvm
5020
        if not _CheckDiskConsistency(self, dev, target_node, False):
5021
          if not self.op.ignore_consistency:
5022
            raise errors.OpExecError("Disk %s is degraded on target node,"
5023
                                     " aborting failover." % dev.iv_name)
5024
    else:
5025
      feedback_fn("* not checking disk consistency as instance is not running")
5026

    
5027
    feedback_fn("* shutting down instance on source node")
5028
    logging.info("Shutting down instance %s on node %s",
5029
                 instance.name, source_node)
5030

    
5031
    result = self.rpc.call_instance_shutdown(source_node, instance,
5032
                                             self.shutdown_timeout)
5033
    msg = result.fail_msg
5034
    if msg:
5035
      if self.op.ignore_consistency:
5036
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5037
                             " Proceeding anyway. Please make sure node"
5038
                             " %s is down. Error details: %s",
5039
                             instance.name, source_node, source_node, msg)
5040
      else:
5041
        raise errors.OpExecError("Could not shutdown instance %s on"
5042
                                 " node %s: %s" %
5043
                                 (instance.name, source_node, msg))
5044

    
5045
    feedback_fn("* deactivating the instance's disks on source node")
5046
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5047
      raise errors.OpExecError("Can't shut down the instance's disks.")
5048

    
5049
    instance.primary_node = target_node
5050
    # distribute new instance config to the other nodes
5051
    self.cfg.Update(instance, feedback_fn)
5052

    
5053
    # Only start the instance if it's marked as up
5054
    if instance.admin_up:
5055
      feedback_fn("* activating the instance's disks on target node")
5056
      logging.info("Starting instance %s on node %s",
5057
                   instance.name, target_node)
5058

    
5059
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5060
                                               ignore_secondaries=True)
5061
      if not disks_ok:
5062
        _ShutdownInstanceDisks(self, instance)
5063
        raise errors.OpExecError("Can't activate the instance's disks")
5064

    
5065
      feedback_fn("* starting the instance on the target node")
5066
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5067
      msg = result.fail_msg
5068
      if msg:
5069
        _ShutdownInstanceDisks(self, instance)
5070
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5071
                                 (instance.name, target_node, msg))
5072

    
5073

    
5074
class LUMigrateInstance(LogicalUnit):
5075
  """Migrate an instance.
5076

5077
  This is migration without shutting down, compared to the failover,
5078
  which is done with shutdown.
5079

5080
  """
5081
  HPATH = "instance-migrate"
5082
  HTYPE = constants.HTYPE_INSTANCE
5083
  _OP_REQP = ["instance_name", "live", "cleanup"]
5084

    
5085
  REQ_BGL = False
5086

    
5087
  def ExpandNames(self):
5088
    self._ExpandAndLockInstance()
5089

    
5090
    self.needed_locks[locking.LEVEL_NODE] = []
5091
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5092

    
5093
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5094
                                       self.op.live, self.op.cleanup)
5095
    self.tasklets = [self._migrater]
5096

    
5097
  def DeclareLocks(self, level):
5098
    if level == locking.LEVEL_NODE:
5099
      self._LockInstancesNodes()
5100

    
5101
  def BuildHooksEnv(self):
5102
    """Build hooks env.
5103

5104
    This runs on master, primary and secondary nodes of the instance.
5105

5106
    """
5107
    instance = self._migrater.instance
5108
    source_node = instance.primary_node
5109
    target_node = instance.secondary_nodes[0]
5110
    env = _BuildInstanceHookEnvByObject(self, instance)
5111
    env["MIGRATE_LIVE"] = self.op.live
5112
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5113
    env.update({
5114
        "OLD_PRIMARY": source_node,
5115
        "OLD_SECONDARY": target_node,
5116
        "NEW_PRIMARY": target_node,
5117
        "NEW_SECONDARY": source_node,
5118
        })
5119
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5120
    nl_post = list(nl)
5121
    nl_post.append(source_node)
5122
    return env, nl, nl_post
5123

    
5124

    
5125
class LUMoveInstance(LogicalUnit):
5126
  """Move an instance by data-copying.
5127

5128
  """
5129
  HPATH = "instance-move"
5130
  HTYPE = constants.HTYPE_INSTANCE
5131
  _OP_REQP = ["instance_name", "target_node"]
5132
  REQ_BGL = False
5133

    
5134
  def CheckArguments(self):
5135
    """Check the arguments.
5136

5137
    """
5138
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5139
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
5140

    
5141
  def ExpandNames(self):
5142
    self._ExpandAndLockInstance()
5143
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5144
    self.op.target_node = target_node
5145
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5146
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5147

    
5148
  def DeclareLocks(self, level):
5149
    if level == locking.LEVEL_NODE:
5150
      self._LockInstancesNodes(primary_only=True)
5151

    
5152
  def BuildHooksEnv(self):
5153
    """Build hooks env.
5154

5155
    This runs on master, primary and secondary nodes of the instance.
5156

5157
    """
5158
    env = {
5159
      "TARGET_NODE": self.op.target_node,
5160
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5161
      }
5162
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5163
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5164
                                       self.op.target_node]
5165
    return env, nl, nl
5166

    
5167
  def CheckPrereq(self):
5168
    """Check prerequisites.
5169

5170
    This checks that the instance is in the cluster.
5171

5172
    """
5173
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5174
    assert self.instance is not None, \
5175
      "Cannot retrieve locked instance %s" % self.op.instance_name
5176

    
5177
    node = self.cfg.GetNodeInfo(self.op.target_node)
5178
    assert node is not None, \
5179
      "Cannot retrieve locked node %s" % self.op.target_node
5180

    
5181
    self.target_node = target_node = node.name
5182

    
5183
    if target_node == instance.primary_node:
5184
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5185
                                 (instance.name, target_node),
5186
                                 errors.ECODE_STATE)
5187

    
5188
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5189

    
5190
    for idx, dsk in enumerate(instance.disks):
5191
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5192
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5193
                                   " cannot copy" % idx, errors.ECODE_STATE)
5194

    
5195
    _CheckNodeOnline(self, target_node)
5196
    _CheckNodeNotDrained(self, target_node)
5197

    
5198
    if instance.admin_up:
5199
      # check memory requirements on the secondary node
5200
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5201
                           instance.name, bep[constants.BE_MEMORY],
5202
                           instance.hypervisor)
5203
    else:
5204
      self.LogInfo("Not checking memory on the secondary node as"
5205
                   " instance will not be started")
5206

    
5207
    # check bridge existance
5208
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5209

    
5210
  def Exec(self, feedback_fn):
5211
    """Move an instance.
5212

5213
    The move is done by shutting it down on its present node, copying
5214
    the data over (slow) and starting it on the new node.
5215

5216
    """
5217
    instance = self.instance
5218

    
5219
    source_node = instance.primary_node
5220
    target_node = self.target_node
5221

    
5222
    self.LogInfo("Shutting down instance %s on source node %s",
5223
                 instance.name, source_node)
5224

    
5225
    result = self.rpc.call_instance_shutdown(source_node, instance,
5226
                                             self.shutdown_timeout)
5227
    msg = result.fail_msg
5228
    if msg:
5229
      if self.op.ignore_consistency:
5230
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5231
                             " Proceeding anyway. Please make sure node"
5232
                             " %s is down. Error details: %s",
5233
                             instance.name, source_node, source_node, msg)
5234
      else:
5235
        raise errors.OpExecError("Could not shutdown instance %s on"
5236
                                 " node %s: %s" %
5237
                                 (instance.name, source_node, msg))
5238

    
5239
    # create the target disks
5240
    try:
5241
      _CreateDisks(self, instance, target_node=target_node)
5242
    except errors.OpExecError:
5243
      self.LogWarning("Device creation failed, reverting...")
5244
      try:
5245
        _RemoveDisks(self, instance, target_node=target_node)
5246
      finally:
5247
        self.cfg.ReleaseDRBDMinors(instance.name)
5248
        raise
5249

    
5250
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5251

    
5252
    errs = []
5253
    # activate, get path, copy the data over
5254
    for idx, disk in enumerate(instance.disks):
5255
      self.LogInfo("Copying data for disk %d", idx)
5256
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5257
                                               instance.name, True)
5258
      if result.fail_msg:
5259
        self.LogWarning("Can't assemble newly created disk %d: %s",
5260
                        idx, result.fail_msg)
5261
        errs.append(result.fail_msg)
5262
        break
5263
      dev_path = result.payload
5264
      result = self.rpc.call_blockdev_export(source_node, disk,
5265
                                             target_node, dev_path,
5266
                                             cluster_name)
5267
      if result.fail_msg:
5268
        self.LogWarning("Can't copy data over for disk %d: %s",
5269
                        idx, result.fail_msg)
5270
        errs.append(result.fail_msg)
5271
        break
5272

    
5273
    if errs:
5274
      self.LogWarning("Some disks failed to copy, aborting")
5275
      try:
5276
        _RemoveDisks(self, instance, target_node=target_node)
5277
      finally:
5278
        self.cfg.ReleaseDRBDMinors(instance.name)
5279
        raise errors.OpExecError("Errors during disk copy: %s" %
5280
                                 (",".join(errs),))
5281

    
5282
    instance.primary_node = target_node
5283
    self.cfg.Update(instance, feedback_fn)
5284

    
5285
    self.LogInfo("Removing the disks on the original node")
5286
    _RemoveDisks(self, instance, target_node=source_node)
5287

    
5288
    # Only start the instance if it's marked as up
5289
    if instance.admin_up:
5290
      self.LogInfo("Starting instance %s on node %s",
5291
                   instance.name, target_node)
5292

    
5293
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5294
                                           ignore_secondaries=True)
5295
      if not disks_ok:
5296
        _ShutdownInstanceDisks(self, instance)
5297
        raise errors.OpExecError("Can't activate the instance's disks")
5298

    
5299
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5300
      msg = result.fail_msg
5301
      if msg:
5302
        _ShutdownInstanceDisks(self, instance)
5303
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5304
                                 (instance.name, target_node, msg))
5305

    
5306

    
5307
class LUMigrateNode(LogicalUnit):
5308
  """Migrate all instances from a node.
5309

5310
  """
5311
  HPATH = "node-migrate"
5312
  HTYPE = constants.HTYPE_NODE
5313
  _OP_REQP = ["node_name", "live"]
5314
  REQ_BGL = False
5315

    
5316
  def ExpandNames(self):
5317
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5318

    
5319
    self.needed_locks = {
5320
      locking.LEVEL_NODE: [self.op.node_name],
5321
      }
5322

    
5323
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5324

    
5325
    # Create tasklets for migrating instances for all instances on this node
5326
    names = []
5327
    tasklets = []
5328

    
5329
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5330
      logging.debug("Migrating instance %s", inst.name)
5331
      names.append(inst.name)
5332

    
5333
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5334

    
5335
    self.tasklets = tasklets
5336

    
5337
    # Declare instance locks
5338
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5339

    
5340
  def DeclareLocks(self, level):
5341
    if level == locking.LEVEL_NODE:
5342
      self._LockInstancesNodes()
5343

    
5344
  def BuildHooksEnv(self):
5345
    """Build hooks env.
5346

5347
    This runs on the master, the primary and all the secondaries.
5348

5349
    """
5350
    env = {
5351
      "NODE_NAME": self.op.node_name,
5352
      }
5353

    
5354
    nl = [self.cfg.GetMasterNode()]
5355

    
5356
    return (env, nl, nl)
5357

    
5358

    
5359
class TLMigrateInstance(Tasklet):
5360
  def __init__(self, lu, instance_name, live, cleanup):
5361
    """Initializes this class.
5362

5363
    """
5364
    Tasklet.__init__(self, lu)
5365

    
5366
    # Parameters
5367
    self.instance_name = instance_name
5368
    self.live = live
5369
    self.cleanup = cleanup
5370

    
5371
  def CheckPrereq(self):
5372
    """Check prerequisites.
5373

5374
    This checks that the instance is in the cluster.
5375

5376
    """
5377
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5378
    instance = self.cfg.GetInstanceInfo(instance_name)
5379
    assert instance is not None
5380

    
5381
    if instance.disk_template != constants.DT_DRBD8:
5382
      raise errors.OpPrereqError("Instance's disk layout is not"
5383
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5384

    
5385
    secondary_nodes = instance.secondary_nodes
5386
    if not secondary_nodes:
5387
      raise errors.ConfigurationError("No secondary node but using"
5388
                                      " drbd8 disk template")
5389

    
5390
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5391

    
5392
    target_node = secondary_nodes[0]
5393
    # check memory requirements on the secondary node
5394
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5395
                         instance.name, i_be[constants.BE_MEMORY],
5396
                         instance.hypervisor)
5397

    
5398
    # check bridge existance
5399
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5400

    
5401
    if not self.cleanup:
5402
      _CheckNodeNotDrained(self, target_node)
5403
      result = self.rpc.call_instance_migratable(instance.primary_node,
5404
                                                 instance)
5405
      result.Raise("Can't migrate, please use failover",
5406
                   prereq=True, ecode=errors.ECODE_STATE)
5407

    
5408
    self.instance = instance
5409

    
5410
  def _WaitUntilSync(self):
5411
    """Poll with custom rpc for disk sync.
5412

5413
    This uses our own step-based rpc call.
5414

5415
    """
5416
    self.feedback_fn("* wait until resync is done")
5417
    all_done = False
5418
    while not all_done:
5419
      all_done = True
5420
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5421
                                            self.nodes_ip,
5422
                                            self.instance.disks)
5423
      min_percent = 100
5424
      for node, nres in result.items():
5425
        nres.Raise("Cannot resync disks on node %s" % node)
5426
        node_done, node_percent = nres.payload
5427
        all_done = all_done and node_done
5428
        if node_percent is not None:
5429
          min_percent = min(min_percent, node_percent)
5430
      if not all_done:
5431
        if min_percent < 100:
5432
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5433
        time.sleep(2)
5434

    
5435
  def _EnsureSecondary(self, node):
5436
    """Demote a node to secondary.
5437

5438
    """
5439
    self.feedback_fn("* switching node %s to secondary mode" % node)
5440

    
5441
    for dev in self.instance.disks:
5442
      self.cfg.SetDiskID(dev, node)
5443

    
5444
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5445
                                          self.instance.disks)
5446
    result.Raise("Cannot change disk to secondary on node %s" % node)
5447

    
5448
  def _GoStandalone(self):
5449
    """Disconnect from the network.
5450

5451
    """
5452
    self.feedback_fn("* changing into standalone mode")
5453
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5454
                                               self.instance.disks)
5455
    for node, nres in result.items():
5456
      nres.Raise("Cannot disconnect disks node %s" % node)
5457

    
5458
  def _GoReconnect(self, multimaster):
5459
    """Reconnect to the network.
5460

5461
    """
5462
    if multimaster:
5463
      msg = "dual-master"
5464
    else:
5465
      msg = "single-master"
5466
    self.feedback_fn("* changing disks into %s mode" % msg)
5467
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5468
                                           self.instance.disks,
5469
                                           self.instance.name, multimaster)
5470
    for node, nres in result.items():
5471
      nres.Raise("Cannot change disks config on node %s" % node)
5472

    
5473
  def _ExecCleanup(self):
5474
    """Try to cleanup after a failed migration.
5475

5476
    The cleanup is done by:
5477
      - check that the instance is running only on one node
5478
        (and update the config if needed)
5479
      - change disks on its secondary node to secondary
5480
      - wait until disks are fully synchronized
5481
      - disconnect from the network
5482
      - change disks into single-master mode
5483
      - wait again until disks are fully synchronized
5484

5485
    """
5486
    instance = self.instance
5487
    target_node = self.target_node
5488
    source_node = self.source_node
5489

    
5490
    # check running on only one node
5491
    self.feedback_fn("* checking where the instance actually runs"
5492
                     " (if this hangs, the hypervisor might be in"
5493
                     " a bad state)")
5494
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5495
    for node, result in ins_l.items():
5496
      result.Raise("Can't contact node %s" % node)
5497

    
5498
    runningon_source = instance.name in ins_l[source_node].payload
5499
    runningon_target = instance.name in ins_l[target_node].payload
5500

    
5501
    if runningon_source and runningon_target:
5502
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5503
                               " or the hypervisor is confused. You will have"
5504
                               " to ensure manually that it runs only on one"
5505
                               " and restart this operation.")
5506

    
5507
    if not (runningon_source or runningon_target):
5508
      raise errors.OpExecError("Instance does not seem to be running at all."
5509
                               " In this case, it's safer to repair by"
5510
                               " running 'gnt-instance stop' to ensure disk"
5511
                               " shutdown, and then restarting it.")
5512

    
5513
    if runningon_target:
5514
      # the migration has actually succeeded, we need to update the config
5515
      self.feedback_fn("* instance running on secondary node (%s),"
5516
                       " updating config" % target_node)
5517
      instance.primary_node = target_node
5518
      self.cfg.Update(instance, self.feedback_fn)
5519
      demoted_node = source_node
5520
    else:
5521
      self.feedback_fn("* instance confirmed to be running on its"
5522
                       " primary node (%s)" % source_node)
5523
      demoted_node = target_node
5524

    
5525
    self._EnsureSecondary(demoted_node)
5526
    try:
5527
      self._WaitUntilSync()
5528
    except errors.OpExecError:
5529
      # we ignore here errors, since if the device is standalone, it
5530
      # won't be able to sync
5531
      pass
5532
    self._GoStandalone()
5533
    self._GoReconnect(False)
5534
    self._WaitUntilSync()
5535

    
5536
    self.feedback_fn("* done")
5537

    
5538
  def _RevertDiskStatus(self):
5539
    """Try to revert the disk status after a failed migration.
5540

5541
    """
5542
    target_node = self.target_node
5543
    try:
5544
      self._EnsureSecondary(target_node)
5545
      self._GoStandalone()
5546
      self._GoReconnect(False)
5547
      self._WaitUntilSync()
5548
    except errors.OpExecError, err:
5549
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5550
                         " drives: error '%s'\n"
5551
                         "Please look and recover the instance status" %
5552
                         str(err))
5553

    
5554
  def _AbortMigration(self):
5555
    """Call the hypervisor code to abort a started migration.
5556

5557
    """
5558
    instance = self.instance
5559
    target_node = self.target_node
5560
    migration_info = self.migration_info
5561

    
5562
    abort_result = self.rpc.call_finalize_migration(target_node,
5563
                                                    instance,
5564
                                                    migration_info,
5565
                                                    False)
5566
    abort_msg = abort_result.fail_msg
5567
    if abort_msg:
5568
      logging.error("Aborting migration failed on target node %s: %s",
5569
                    target_node, abort_msg)
5570
      # Don't raise an exception here, as we stil have to try to revert the
5571
      # disk status, even if this step failed.
5572

    
5573
  def _ExecMigration(self):
5574
    """Migrate an instance.
5575

5576
    The migrate is done by:
5577
      - change the disks into dual-master mode
5578
      - wait until disks are fully synchronized again
5579
      - migrate the instance
5580
      - change disks on the new secondary node (the old primary) to secondary
5581
      - wait until disks are fully synchronized
5582
      - change disks into single-master mode
5583

5584
    """
5585
    instance = self.instance
5586
    target_node = self.target_node
5587
    source_node = self.source_node
5588

    
5589
    self.feedback_fn("* checking disk consistency between source and target")
5590
    for dev in instance.disks:
5591
      if not _CheckDiskConsistency(self, dev, target_node, False):
5592
        raise errors.OpExecError("Disk %s is degraded or not fully"
5593
                                 " synchronized on target node,"
5594
                                 " aborting migrate." % dev.iv_name)
5595

    
5596
    # First get the migration information from the remote node
5597
    result = self.rpc.call_migration_info(source_node, instance)
5598
    msg = result.fail_msg
5599
    if msg:
5600
      log_err = ("Failed fetching source migration information from %s: %s" %
5601
                 (source_node, msg))
5602
      logging.error(log_err)
5603
      raise errors.OpExecError(log_err)
5604

    
5605
    self.migration_info = migration_info = result.payload
5606

    
5607
    # Then switch the disks to master/master mode
5608
    self._EnsureSecondary(target_node)
5609
    self._GoStandalone()
5610
    self._GoReconnect(True)
5611
    self._WaitUntilSync()
5612

    
5613
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
5614
    result = self.rpc.call_accept_instance(target_node,
5615
                                           instance,
5616
                                           migration_info,
5617
                                           self.nodes_ip[target_node])
5618

    
5619
    msg = result.fail_msg
5620
    if msg:
5621
      logging.error("Instance pre-migration failed, trying to revert"
5622
                    " disk status: %s", msg)
5623
      self.feedback_fn("Pre-migration failed, aborting")
5624
      self._AbortMigration()
5625
      self._RevertDiskStatus()
5626
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5627
                               (instance.name, msg))
5628

    
5629
    self.feedback_fn("* migrating instance to %s" % target_node)
5630
    time.sleep(10)
5631
    result = self.rpc.call_instance_migrate(source_node, instance,
5632
                                            self.nodes_ip[target_node],
5633
                                            self.live)
5634
    msg = result.fail_msg
5635
    if msg:
5636
      logging.error("Instance migration failed, trying to revert"
5637
                    " disk status: %s", msg)
5638
      self.feedback_fn("Migration failed, aborting")
5639
      self._AbortMigration()
5640
      self._RevertDiskStatus()
5641
      raise errors.OpExecError("Could not migrate instance %s: %s" %
5642
                               (instance.name, msg))
5643
    time.sleep(10)
5644

    
5645
    instance.primary_node = target_node
5646
    # distribute new instance config to the other nodes
5647
    self.cfg.Update(instance, self.feedback_fn)
5648

    
5649
    result = self.rpc.call_finalize_migration(target_node,
5650
                                              instance,
5651
                                              migration_info,
5652
                                              True)
5653
    msg = result.fail_msg
5654
    if msg:
5655
      logging.error("Instance migration succeeded, but finalization failed:"
5656
                    " %s", msg)
5657
      raise errors.OpExecError("Could not finalize instance migration: %s" %
5658
                               msg)
5659

    
5660
    self._EnsureSecondary(source_node)
5661
    self._WaitUntilSync()
5662
    self._GoStandalone()
5663
    self._GoReconnect(False)
5664
    self._WaitUntilSync()
5665

    
5666
    self.feedback_fn("* done")
5667

    
5668
  def Exec(self, feedback_fn):
5669
    """Perform the migration.
5670

5671
    """
5672
    feedback_fn("Migrating instance %s" % self.instance.name)
5673

    
5674
    self.feedback_fn = feedback_fn
5675

    
5676
    self.source_node = self.instance.primary_node
5677
    self.target_node = self.instance.secondary_nodes[0]
5678
    self.all_nodes = [self.source_node, self.target_node]
5679
    self.nodes_ip = {
5680
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5681
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5682
      }
5683

    
5684
    if self.cleanup:
5685
      return self._ExecCleanup()
5686
    else:
5687
      return self._ExecMigration()
5688

    
5689

    
5690
def _CreateBlockDev(lu, node, instance, device, force_create,
5691
                    info, force_open):
5692
  """Create a tree of block devices on a given node.
5693

5694
  If this device type has to be created on secondaries, create it and
5695
  all its children.
5696

5697
  If not, just recurse to children keeping the same 'force' value.
5698

5699
  @param lu: the lu on whose behalf we execute
5700
  @param node: the node on which to create the device
5701
  @type instance: L{objects.Instance}
5702
  @param instance: the instance which owns the device
5703
  @type device: L{objects.Disk}
5704
  @param device: the device to create
5705
  @type force_create: boolean
5706
  @param force_create: whether to force creation of this device; this
5707
      will be change to True whenever we find a device which has
5708
      CreateOnSecondary() attribute
5709
  @param info: the extra 'metadata' we should attach to the device
5710
      (this will be represented as a LVM tag)
5711
  @type force_open: boolean
5712
  @param force_open: this parameter will be passes to the
5713
      L{backend.BlockdevCreate} function where it specifies
5714
      whether we run on primary or not, and it affects both
5715
      the child assembly and the device own Open() execution
5716

5717
  """
5718
  if device.CreateOnSecondary():
5719
    force_create = True
5720

    
5721
  if device.children:
5722
    for child in device.children:
5723
      _CreateBlockDev(lu, node, instance, child, force_create,
5724
                      info, force_open)
5725

    
5726
  if not force_create:
5727
    return
5728

    
5729
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5730

    
5731

    
5732
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5733
  """Create a single block device on a given node.
5734

5735
  This will not recurse over children of the device, so they must be
5736
  created in advance.
5737

5738
  @param lu: the lu on whose behalf we execute
5739
  @param node: the node on which to create the device
5740
  @type instance: L{objects.Instance}
5741
  @param instance: the instance which owns the device
5742
  @type device: L{objects.Disk}
5743
  @param device: the device to create
5744
  @param info: the extra 'metadata' we should attach to the device
5745
      (this will be represented as a LVM tag)
5746
  @type force_open: boolean
5747
  @param force_open: this parameter will be passes to the
5748
      L{backend.BlockdevCreate} function where it specifies
5749
      whether we run on primary or not, and it affects both
5750
      the child assembly and the device own Open() execution
5751

5752
  """
5753
  lu.cfg.SetDiskID(device, node)
5754
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5755
                                       instance.name, force_open, info)
5756
  result.Raise("Can't create block device %s on"
5757
               " node %s for instance %s" % (device, node, instance.name))
5758
  if device.physical_id is None:
5759
    device.physical_id = result.payload
5760

    
5761

    
5762
def _GenerateUniqueNames(lu, exts):
5763
  """Generate a suitable LV name.
5764

5765
  This will generate a logical volume name for the given instance.
5766

5767
  """
5768
  results = []
5769
  for val in exts:
5770
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5771
    results.append("%s%s" % (new_id, val))
5772
  return results
5773

    
5774

    
5775
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5776
                         p_minor, s_minor):
5777
  """Generate a drbd8 device complete with its children.
5778

5779
  """
5780
  port = lu.cfg.AllocatePort()
5781
  vgname = lu.cfg.GetVGName()
5782
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5783
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5784
                          logical_id=(vgname, names[0]))
5785
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5786
                          logical_id=(vgname, names[1]))
5787
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5788
                          logical_id=(primary, secondary, port,
5789
                                      p_minor, s_minor,
5790
                                      shared_secret),
5791
                          children=[dev_data, dev_meta],
5792
                          iv_name=iv_name)
5793
  return drbd_dev
5794

    
5795

    
5796
def _GenerateDiskTemplate(lu, template_name,
5797
                          instance_name, primary_node,
5798
                          secondary_nodes, disk_info,
5799
                          file_storage_dir, file_driver,
5800
                          base_index):
5801
  """Generate the entire disk layout for a given template type.
5802

5803
  """
5804
  #TODO: compute space requirements
5805

    
5806
  vgname = lu.cfg.GetVGName()
5807
  disk_count = len(disk_info)
5808
  disks = []
5809
  if template_name == constants.DT_DISKLESS:
5810
    pass
5811
  elif template_name == constants.DT_PLAIN:
5812
    if len(secondary_nodes) != 0:
5813
      raise errors.ProgrammerError("Wrong template configuration")
5814

    
5815
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5816
                                      for i in range(disk_count)])
5817
    for idx, disk in enumerate(disk_info):
5818
      disk_index = idx + base_index
5819
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5820
                              logical_id=(vgname, names[idx]),
5821
                              iv_name="disk/%d" % disk_index,
5822
                              mode=disk["mode"])
5823
      disks.append(disk_dev)
5824
  elif template_name == constants.DT_DRBD8:
5825
    if len(secondary_nodes) != 1:
5826
      raise errors.ProgrammerError("Wrong template configuration")
5827
    remote_node = secondary_nodes[0]
5828
    minors = lu.cfg.AllocateDRBDMinor(
5829
      [primary_node, remote_node] * len(disk_info), instance_name)
5830

    
5831
    names = []
5832
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5833
                                               for i in range(disk_count)]):
5834
      names.append(lv_prefix + "_data")
5835
      names.append(lv_prefix + "_meta")
5836
    for idx, disk in enumerate(disk_info):
5837
      disk_index = idx + base_index
5838
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5839
                                      disk["size"], names[idx*2:idx*2+2],
5840
                                      "disk/%d" % disk_index,
5841
                                      minors[idx*2], minors[idx*2+1])
5842
      disk_dev.mode = disk["mode"]
5843
      disks.append(disk_dev)
5844
  elif template_name == constants.DT_FILE:
5845
    if len(secondary_nodes) != 0:
5846
      raise errors.ProgrammerError("Wrong template configuration")
5847

    
5848
    _RequireFileStorage()
5849

    
5850
    for idx, disk in enumerate(disk_info):
5851
      disk_index = idx + base_index
5852
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5853
                              iv_name="disk/%d" % disk_index,
5854
                              logical_id=(file_driver,
5855
                                          "%s/disk%d" % (file_storage_dir,
5856
                                                         disk_index)),
5857
                              mode=disk["mode"])
5858
      disks.append(disk_dev)
5859
  else:
5860
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5861
  return disks
5862

    
5863

    
5864
def _GetInstanceInfoText(instance):
5865
  """Compute that text that should be added to the disk's metadata.
5866

5867
  """
5868
  return "originstname+%s" % instance.name
5869

    
5870

    
5871
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5872
  """Create all disks for an instance.
5873

5874
  This abstracts away some work from AddInstance.
5875

5876
  @type lu: L{LogicalUnit}
5877
  @param lu: the logical unit on whose behalf we execute
5878
  @type instance: L{objects.Instance}
5879
  @param instance: the instance whose disks we should create
5880
  @type to_skip: list
5881
  @param to_skip: list of indices to skip
5882
  @type target_node: string
5883
  @param target_node: if passed, overrides the target node for creation
5884
  @rtype: boolean
5885
  @return: the success of the creation
5886

5887
  """
5888
  info = _GetInstanceInfoText(instance)
5889
  if target_node is None:
5890
    pnode = instance.primary_node
5891
    all_nodes = instance.all_nodes
5892
  else:
5893
    pnode = target_node
5894
    all_nodes = [pnode]
5895

    
5896
  if instance.disk_template == constants.DT_FILE:
5897
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5898
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5899

    
5900
    result.Raise("Failed to create directory '%s' on"
5901
                 " node %s" % (file_storage_dir, pnode))
5902

    
5903
  # Note: this needs to be kept in sync with adding of disks in
5904
  # LUSetInstanceParams
5905
  for idx, device in enumerate(instance.disks):
5906
    if to_skip and idx in to_skip:
5907
      continue
5908
    logging.info("Creating volume %s for instance %s",
5909
                 device.iv_name, instance.name)
5910
    #HARDCODE
5911
    for node in all_nodes:
5912
      f_create = node == pnode
5913
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5914

    
5915

    
5916
def _RemoveDisks(lu, instance, target_node=None):
5917
  """Remove all disks for an instance.
5918

5919
  This abstracts away some work from `AddInstance()` and
5920
  `RemoveInstance()`. Note that in case some of the devices couldn't
5921
  be removed, the removal will continue with the other ones (compare
5922
  with `_CreateDisks()`).
5923

5924
  @type lu: L{LogicalUnit}
5925
  @param lu: the logical unit on whose behalf we execute
5926
  @type instance: L{objects.Instance}
5927
  @param instance: the instance whose disks we should remove
5928
  @type target_node: string
5929
  @param target_node: used to override the node on which to remove the disks
5930
  @rtype: boolean
5931
  @return: the success of the removal
5932

5933
  """
5934
  logging.info("Removing block devices for instance %s", instance.name)
5935

    
5936
  all_result = True
5937
  for device in instance.disks:
5938
    if target_node:
5939
      edata = [(target_node, device)]
5940
    else:
5941
      edata = device.ComputeNodeTree(instance.primary_node)
5942
    for node, disk in edata:
5943
      lu.cfg.SetDiskID(disk, node)
5944
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5945
      if msg:
5946
        lu.LogWarning("Could not remove block device %s on node %s,"
5947
                      " continuing anyway: %s", device.iv_name, node, msg)
5948
        all_result = False
5949

    
5950
  if instance.disk_template == constants.DT_FILE:
5951
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5952
    if target_node:
5953
      tgt = target_node
5954
    else:
5955
      tgt = instance.primary_node
5956
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5957
    if result.fail_msg:
5958
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5959
                    file_storage_dir, instance.primary_node, result.fail_msg)
5960
      all_result = False
5961

    
5962
  return all_result
5963

    
5964

    
5965
def _ComputeDiskSize(disk_template, disks):
5966
  """Compute disk size requirements in the volume group
5967

5968
  """
5969
  # Required free disk space as a function of disk and swap space
5970
  req_size_dict = {
5971
    constants.DT_DISKLESS: None,
5972
    constants.DT_PLAIN: sum(d["size"] for d in disks),
5973
    # 128 MB are added for drbd metadata for each disk
5974
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5975
    constants.DT_FILE: None,
5976
  }
5977

    
5978
  if disk_template not in req_size_dict:
5979
    raise errors.ProgrammerError("Disk template '%s' size requirement"
5980
                                 " is unknown" %  disk_template)
5981

    
5982
  return req_size_dict[disk_template]
5983

    
5984

    
5985
def _CheckHVParams(lu, nodenames, hvname, hvparams):
5986
  """Hypervisor parameter validation.
5987

5988
  This function abstract the hypervisor parameter validation to be
5989
  used in both instance create and instance modify.
5990

5991
  @type lu: L{LogicalUnit}
5992
  @param lu: the logical unit for which we check
5993
  @type nodenames: list
5994
  @param nodenames: the list of nodes on which we should check
5995
  @type hvname: string
5996
  @param hvname: the name of the hypervisor we should use
5997
  @type hvparams: dict
5998
  @param hvparams: the parameters which we need to check
5999
  @raise errors.OpPrereqError: if the parameters are not valid
6000

6001
  """
6002
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6003
                                                  hvname,
6004
                                                  hvparams)
6005
  for node in nodenames:
6006
    info = hvinfo[node]
6007
    if info.offline:
6008
      continue
6009
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6010

    
6011

    
6012
class LUCreateInstance(LogicalUnit):
6013
  """Create an instance.
6014

6015
  """
6016
  HPATH = "instance-add"
6017
  HTYPE = constants.HTYPE_INSTANCE
6018
  _OP_REQP = ["instance_name", "disks",
6019
              "mode", "start",
6020
              "wait_for_sync", "ip_check", "nics",
6021
              "hvparams", "beparams"]
6022
  REQ_BGL = False
6023

    
6024
  def CheckArguments(self):
6025
    """Check arguments.
6026

6027
    """
6028
    # set optional parameters to none if they don't exist
6029
    for attr in ["pnode", "snode", "iallocator", "hypervisor",
6030
                 "disk_template", "identify_defaults"]:
6031
      if not hasattr(self.op, attr):
6032
        setattr(self.op, attr, None)
6033

    
6034
    # do not require name_check to ease forward/backward compatibility
6035
    # for tools
6036
    if not hasattr(self.op, "name_check"):
6037
      self.op.name_check = True
6038
    if not hasattr(self.op, "no_install"):
6039
      self.op.no_install = False
6040
    if self.op.no_install and self.op.start:
6041
      self.LogInfo("No-installation mode selected, disabling startup")
6042
      self.op.start = False
6043
    # validate/normalize the instance name
6044
    self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6045
    if self.op.ip_check and not self.op.name_check:
6046
      # TODO: make the ip check more flexible and not depend on the name check
6047
      raise errors.OpPrereqError("Cannot do ip checks without a name check",
6048
                                 errors.ECODE_INVAL)
6049
    # check disk information: either all adopt, or no adopt
6050
    has_adopt = has_no_adopt = False
6051
    for disk in self.op.disks:
6052
      if "adopt" in disk:
6053
        has_adopt = True
6054
      else:
6055
        has_no_adopt = True
6056
    if has_adopt and has_no_adopt:
6057
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6058
                                 errors.ECODE_INVAL)
6059
    if has_adopt:
6060
      if self.op.disk_template != constants.DT_PLAIN:
6061
        raise errors.OpPrereqError("Disk adoption is only supported for the"
6062
                                   " 'plain' disk template",
6063
                                   errors.ECODE_INVAL)
6064
      if self.op.iallocator is not None:
6065
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6066
                                   " iallocator script", errors.ECODE_INVAL)
6067
      if self.op.mode == constants.INSTANCE_IMPORT:
6068
        raise errors.OpPrereqError("Disk adoption not allowed for"
6069
                                   " instance import", errors.ECODE_INVAL)
6070

    
6071
    self.adopt_disks = has_adopt
6072

    
6073
    # verify creation mode
6074
    if self.op.mode not in (constants.INSTANCE_CREATE,
6075
                            constants.INSTANCE_IMPORT):
6076
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6077
                                 self.op.mode, errors.ECODE_INVAL)
6078

    
6079
    # instance name verification
6080
    if self.op.name_check:
6081
      self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6082
      self.op.instance_name = self.hostname1.name
6083
      # used in CheckPrereq for ip ping check
6084
      self.check_ip = self.hostname1.ip
6085
    else:
6086
      self.check_ip = None
6087

    
6088
    # file storage checks
6089
    if (self.op.file_driver and
6090
        not self.op.file_driver in constants.FILE_DRIVER):
6091
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6092
                                 self.op.file_driver, errors.ECODE_INVAL)
6093

    
6094
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6095
      raise errors.OpPrereqError("File storage directory path not absolute",
6096
                                 errors.ECODE_INVAL)
6097

    
6098
    ### Node/iallocator related checks
6099
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
6100
      raise errors.OpPrereqError("One and only one of iallocator and primary"
6101
                                 " node must be given",
6102
                                 errors.ECODE_INVAL)
6103

    
6104
    if self.op.mode == constants.INSTANCE_IMPORT:
6105
      # On import force_variant must be True, because if we forced it at
6106
      # initial install, our only chance when importing it back is that it
6107
      # works again!
6108
      self.op.force_variant = True
6109

    
6110
      if self.op.no_install:
6111
        self.LogInfo("No-installation mode has no effect during import")
6112

    
6113
    else: # INSTANCE_CREATE
6114
      if getattr(self.op, "os_type", None) is None:
6115
        raise errors.OpPrereqError("No guest OS specified",
6116
                                   errors.ECODE_INVAL)
6117
      self.op.force_variant = getattr(self.op, "force_variant", False)
6118
      if self.op.disk_template is None:
6119
        raise errors.OpPrereqError("No disk template specified",
6120
                                   errors.ECODE_INVAL)
6121

    
6122
  def ExpandNames(self):
6123
    """ExpandNames for CreateInstance.
6124

6125
    Figure out the right locks for instance creation.
6126

6127
    """
6128
    self.needed_locks = {}
6129

    
6130
    instance_name = self.op.instance_name
6131
    # this is just a preventive check, but someone might still add this
6132
    # instance in the meantime, and creation will fail at lock-add time
6133
    if instance_name in self.cfg.GetInstanceList():
6134
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6135
                                 instance_name, errors.ECODE_EXISTS)
6136

    
6137
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6138

    
6139
    if self.op.iallocator:
6140
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6141
    else:
6142
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6143
      nodelist = [self.op.pnode]
6144
      if self.op.snode is not None:
6145
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6146
        nodelist.append(self.op.snode)
6147
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6148

    
6149
    # in case of import lock the source node too
6150
    if self.op.mode == constants.INSTANCE_IMPORT:
6151
      src_node = getattr(self.op, "src_node", None)
6152
      src_path = getattr(self.op, "src_path", None)
6153

    
6154
      if src_path is None:
6155
        self.op.src_path = src_path = self.op.instance_name
6156

    
6157
      if src_node is None:
6158
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6159
        self.op.src_node = None
6160
        if os.path.isabs(src_path):
6161
          raise errors.OpPrereqError("Importing an instance from an absolute"
6162
                                     " path requires a source node option.",
6163
                                     errors.ECODE_INVAL)
6164
      else:
6165
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6166
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6167
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6168
        if not os.path.isabs(src_path):
6169
          self.op.src_path = src_path = \
6170
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6171

    
6172
  def _RunAllocator(self):
6173
    """Run the allocator based on input opcode.
6174

6175
    """
6176
    nics = [n.ToDict() for n in self.nics]
6177
    ial = IAllocator(self.cfg, self.rpc,
6178
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6179
                     name=self.op.instance_name,
6180
                     disk_template=self.op.disk_template,
6181
                     tags=[],
6182
                     os=self.op.os_type,
6183
                     vcpus=self.be_full[constants.BE_VCPUS],
6184
                     mem_size=self.be_full[constants.BE_MEMORY],
6185
                     disks=self.disks,
6186
                     nics=nics,
6187
                     hypervisor=self.op.hypervisor,
6188
                     )
6189

    
6190
    ial.Run(self.op.iallocator)
6191

    
6192
    if not ial.success:
6193
      raise errors.OpPrereqError("Can't compute nodes using"
6194
                                 " iallocator '%s': %s" %
6195
                                 (self.op.iallocator, ial.info),
6196
                                 errors.ECODE_NORES)
6197
    if len(ial.result) != ial.required_nodes:
6198
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6199
                                 " of nodes (%s), required %s" %
6200
                                 (self.op.iallocator, len(ial.result),
6201
                                  ial.required_nodes), errors.ECODE_FAULT)
6202
    self.op.pnode = ial.result[0]
6203
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6204
                 self.op.instance_name, self.op.iallocator,
6205
                 utils.CommaJoin(ial.result))
6206
    if ial.required_nodes == 2:
6207
      self.op.snode = ial.result[1]
6208

    
6209
  def BuildHooksEnv(self):
6210
    """Build hooks env.
6211

6212
    This runs on master, primary and secondary nodes of the instance.
6213

6214
    """
6215
    env = {
6216
      "ADD_MODE": self.op.mode,
6217
      }
6218
    if self.op.mode == constants.INSTANCE_IMPORT:
6219
      env["SRC_NODE"] = self.op.src_node
6220
      env["SRC_PATH"] = self.op.src_path
6221
      env["SRC_IMAGES"] = self.src_images
6222

    
6223
    env.update(_BuildInstanceHookEnv(
6224
      name=self.op.instance_name,
6225
      primary_node=self.op.pnode,
6226
      secondary_nodes=self.secondaries,
6227
      status=self.op.start,
6228
      os_type=self.op.os_type,
6229
      memory=self.be_full[constants.BE_MEMORY],
6230
      vcpus=self.be_full[constants.BE_VCPUS],
6231
      nics=_NICListToTuple(self, self.nics),
6232
      disk_template=self.op.disk_template,
6233
      disks=[(d["size"], d["mode"]) for d in self.disks],
6234
      bep=self.be_full,
6235
      hvp=self.hv_full,
6236
      hypervisor_name=self.op.hypervisor,
6237
    ))
6238

    
6239
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6240
          self.secondaries)
6241
    return env, nl, nl
6242

    
6243
  def _ReadExportInfo(self):
6244
    """Reads the export information from disk.
6245

6246
    It will override the opcode source node and path with the actual
6247
    information, if these two were not specified before.
6248

6249
    @return: the export information
6250

6251
    """
6252
    assert self.op.mode == constants.INSTANCE_IMPORT
6253

    
6254
    src_node = self.op.src_node
6255
    src_path = self.op.src_path
6256

    
6257
    if src_node is None:
6258
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6259
      exp_list = self.rpc.call_export_list(locked_nodes)
6260
      found = False
6261
      for node in exp_list:
6262
        if exp_list[node].fail_msg:
6263
          continue
6264
        if src_path in exp_list[node].payload:
6265
          found = True
6266
          self.op.src_node = src_node = node
6267
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6268
                                                       src_path)
6269
          break
6270
      if not found:
6271
        raise errors.OpPrereqError("No export found for relative path %s" %
6272
                                    src_path, errors.ECODE_INVAL)
6273

    
6274
    _CheckNodeOnline(self, src_node)
6275
    result = self.rpc.call_export_info(src_node, src_path)
6276
    result.Raise("No export or invalid export found in dir %s" % src_path)
6277

    
6278
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6279
    if not export_info.has_section(constants.INISECT_EXP):
6280
      raise errors.ProgrammerError("Corrupted export config",
6281
                                   errors.ECODE_ENVIRON)
6282

    
6283
    ei_version = export_info.get(constants.INISECT_EXP, "version")
6284
    if (int(ei_version) != constants.EXPORT_VERSION):
6285
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6286
                                 (ei_version, constants.EXPORT_VERSION),
6287
                                 errors.ECODE_ENVIRON)
6288
    return export_info
6289

    
6290
  def _ReadExportParams(self, einfo):
6291
    """Use export parameters as defaults.
6292

6293
    In case the opcode doesn't specify (as in override) some instance
6294
    parameters, then try to use them from the export information, if
6295
    that declares them.
6296

6297
    """
6298
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6299

    
6300
    if self.op.disk_template is None:
6301
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
6302
        self.op.disk_template = einfo.get(constants.INISECT_INS,
6303
                                          "disk_template")
6304
      else:
6305
        raise errors.OpPrereqError("No disk template specified and the export"
6306
                                   " is missing the disk_template information",
6307
                                   errors.ECODE_INVAL)
6308

    
6309
    if not self.op.disks:
6310
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
6311
        disks = []
6312
        # TODO: import the disk iv_name too
6313
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6314
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6315
          disks.append({"size": disk_sz})
6316
        self.op.disks = disks
6317
      else:
6318
        raise errors.OpPrereqError("No disk info specified and the export"
6319
                                   " is missing the disk information",
6320
                                   errors.ECODE_INVAL)
6321

    
6322
    if (not self.op.nics and
6323
        einfo.has_option(constants.INISECT_INS, "nic_count")):
6324
      nics = []
6325
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6326
        ndict = {}
6327
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6328
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6329
          ndict[name] = v
6330
        nics.append(ndict)
6331
      self.op.nics = nics
6332

    
6333
    if (self.op.hypervisor is None and
6334
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
6335
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6336
    if einfo.has_section(constants.INISECT_HYP):
6337
      # use the export parameters but do not override the ones
6338
      # specified by the user
6339
      for name, value in einfo.items(constants.INISECT_HYP):
6340
        if name not in self.op.hvparams:
6341
          self.op.hvparams[name] = value
6342

    
6343
    if einfo.has_section(constants.INISECT_BEP):
6344
      # use the parameters, without overriding
6345
      for name, value in einfo.items(constants.INISECT_BEP):
6346
        if name not in self.op.beparams:
6347
          self.op.beparams[name] = value
6348
    else:
6349
      # try to read the parameters old style, from the main section
6350
      for name in constants.BES_PARAMETERS:
6351
        if (name not in self.op.beparams and
6352
            einfo.has_option(constants.INISECT_INS, name)):
6353
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6354

    
6355
  def _RevertToDefaults(self, cluster):
6356
    """Revert the instance parameters to the default values.
6357

6358
    """
6359
    # hvparams
6360
    hv_defs = cluster.GetHVDefaults(self.op.hypervisor, self.op.os_type)
6361
    for name in self.op.hvparams.keys():
6362
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6363
        del self.op.hvparams[name]
6364
    # beparams
6365
    be_defs = cluster.beparams.get(constants.PP_DEFAULT, {})
6366
    for name in self.op.beparams.keys():
6367
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
6368
        del self.op.beparams[name]
6369
    # nic params
6370
    nic_defs = cluster.nicparams.get(constants.PP_DEFAULT, {})
6371
    for nic in self.op.nics:
6372
      for name in constants.NICS_PARAMETERS:
6373
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6374
          del nic[name]
6375

    
6376
  def CheckPrereq(self):
6377
    """Check prerequisites.
6378

6379
    """
6380
    if self.op.mode == constants.INSTANCE_IMPORT:
6381
      export_info = self._ReadExportInfo()
6382
      self._ReadExportParams(export_info)
6383

    
6384
    _CheckDiskTemplate(self.op.disk_template)
6385

    
6386
    if (not self.cfg.GetVGName() and
6387
        self.op.disk_template not in constants.DTS_NOT_LVM):
6388
      raise errors.OpPrereqError("Cluster does not support lvm-based"
6389
                                 " instances", errors.ECODE_STATE)
6390

    
6391
    if self.op.hypervisor is None:
6392
      self.op.hypervisor = self.cfg.GetHypervisorType()
6393

    
6394
    cluster = self.cfg.GetClusterInfo()
6395
    enabled_hvs = cluster.enabled_hypervisors
6396
    if self.op.hypervisor not in enabled_hvs:
6397
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6398
                                 " cluster (%s)" % (self.op.hypervisor,
6399
                                  ",".join(enabled_hvs)),
6400
                                 errors.ECODE_STATE)
6401

    
6402
    # check hypervisor parameter syntax (locally)
6403
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6404
    filled_hvp = objects.FillDict(cluster.GetHVDefaults(self.op.hypervisor,
6405
                                                        self.op.os_type),
6406
                                  self.op.hvparams)
6407
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6408
    hv_type.CheckParameterSyntax(filled_hvp)
6409
    self.hv_full = filled_hvp
6410
    # check that we don't specify global parameters on an instance
6411
    _CheckGlobalHvParams(self.op.hvparams)
6412

    
6413
    # fill and remember the beparams dict
6414
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6415
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
6416
                                    self.op.beparams)
6417

    
6418
    # now that hvp/bep are in final format, let's reset to defaults,
6419
    # if told to do so
6420
    if self.op.identify_defaults:
6421
      self._RevertToDefaults(cluster)
6422

    
6423
    # NIC buildup
6424
    self.nics = []
6425
    for idx, nic in enumerate(self.op.nics):
6426
      nic_mode_req = nic.get("mode", None)
6427
      nic_mode = nic_mode_req
6428
      if nic_mode is None:
6429
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6430

    
6431
      # in routed mode, for the first nic, the default ip is 'auto'
6432
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6433
        default_ip_mode = constants.VALUE_AUTO
6434
      else:
6435
        default_ip_mode = constants.VALUE_NONE
6436

    
6437
      # ip validity checks
6438
      ip = nic.get("ip", default_ip_mode)
6439
      if ip is None or ip.lower() == constants.VALUE_NONE:
6440
        nic_ip = None
6441
      elif ip.lower() == constants.VALUE_AUTO:
6442
        if not self.op.name_check:
6443
          raise errors.OpPrereqError("IP address set to auto but name checks"
6444
                                     " have been skipped. Aborting.",
6445
                                     errors.ECODE_INVAL)
6446
        nic_ip = self.hostname1.ip
6447
      else:
6448
        if not utils.IsValidIP(ip):
6449
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6450
                                     " like a valid IP" % ip,
6451
                                     errors.ECODE_INVAL)
6452
        nic_ip = ip
6453

    
6454
      # TODO: check the ip address for uniqueness
6455
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6456
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
6457
                                   errors.ECODE_INVAL)
6458

    
6459
      # MAC address verification
6460
      mac = nic.get("mac", constants.VALUE_AUTO)
6461
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6462
        mac = utils.NormalizeAndValidateMac(mac)
6463

    
6464
        try:
6465
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
6466
        except errors.ReservationError:
6467
          raise errors.OpPrereqError("MAC address %s already in use"
6468
                                     " in cluster" % mac,
6469
                                     errors.ECODE_NOTUNIQUE)
6470

    
6471
      # bridge verification
6472
      bridge = nic.get("bridge", None)
6473
      link = nic.get("link", None)
6474
      if bridge and link:
6475
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6476
                                   " at the same time", errors.ECODE_INVAL)
6477
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6478
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6479
                                   errors.ECODE_INVAL)
6480
      elif bridge:
6481
        link = bridge
6482

    
6483
      nicparams = {}
6484
      if nic_mode_req:
6485
        nicparams[constants.NIC_MODE] = nic_mode_req
6486
      if link:
6487
        nicparams[constants.NIC_LINK] = link
6488

    
6489
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
6490
                                      nicparams)
6491
      objects.NIC.CheckParameterSyntax(check_params)
6492
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6493

    
6494
    # disk checks/pre-build
6495
    self.disks = []
6496
    for disk in self.op.disks:
6497
      mode = disk.get("mode", constants.DISK_RDWR)
6498
      if mode not in constants.DISK_ACCESS_SET:
6499
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6500
                                   mode, errors.ECODE_INVAL)
6501
      size = disk.get("size", None)
6502
      if size is None:
6503
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6504
      try:
6505
        size = int(size)
6506
      except (TypeError, ValueError):
6507
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6508
                                   errors.ECODE_INVAL)
6509
      new_disk = {"size": size, "mode": mode}
6510
      if "adopt" in disk:
6511
        new_disk["adopt"] = disk["adopt"]
6512
      self.disks.append(new_disk)
6513

    
6514
    if self.op.mode == constants.INSTANCE_IMPORT:
6515

    
6516
      # Check that the new instance doesn't have less disks than the export
6517
      instance_disks = len(self.disks)
6518
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6519
      if instance_disks < export_disks:
6520
        raise errors.OpPrereqError("Not enough disks to import."
6521
                                   " (instance: %d, export: %d)" %
6522
                                   (instance_disks, export_disks),
6523
                                   errors.ECODE_INVAL)
6524

    
6525
      disk_images = []
6526
      for idx in range(export_disks):
6527
        option = 'disk%d_dump' % idx
6528
        if export_info.has_option(constants.INISECT_INS, option):
6529
          # FIXME: are the old os-es, disk sizes, etc. useful?
6530
          export_name = export_info.get(constants.INISECT_INS, option)
6531
          image = utils.PathJoin(self.op.src_path, export_name)
6532
          disk_images.append(image)
6533
        else:
6534
          disk_images.append(False)
6535

    
6536
      self.src_images = disk_images
6537

    
6538
      old_name = export_info.get(constants.INISECT_INS, 'name')
6539
      try:
6540
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6541
      except (TypeError, ValueError), err:
6542
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
6543
                                   " an integer: %s" % str(err),
6544
                                   errors.ECODE_STATE)
6545
      if self.op.instance_name == old_name:
6546
        for idx, nic in enumerate(self.nics):
6547
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6548
            nic_mac_ini = 'nic%d_mac' % idx
6549
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6550

    
6551
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6552

    
6553
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
6554
    if self.op.ip_check:
6555
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6556
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6557
                                   (self.check_ip, self.op.instance_name),
6558
                                   errors.ECODE_NOTUNIQUE)
6559

    
6560
    #### mac address generation
6561
    # By generating here the mac address both the allocator and the hooks get
6562
    # the real final mac address rather than the 'auto' or 'generate' value.
6563
    # There is a race condition between the generation and the instance object
6564
    # creation, which means that we know the mac is valid now, but we're not
6565
    # sure it will be when we actually add the instance. If things go bad
6566
    # adding the instance will abort because of a duplicate mac, and the
6567
    # creation job will fail.
6568
    for nic in self.nics:
6569
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6570
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6571

    
6572
    #### allocator run
6573

    
6574
    if self.op.iallocator is not None:
6575
      self._RunAllocator()
6576

    
6577
    #### node related checks
6578

    
6579
    # check primary node
6580
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6581
    assert self.pnode is not None, \
6582
      "Cannot retrieve locked node %s" % self.op.pnode
6583
    if pnode.offline:
6584
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6585
                                 pnode.name, errors.ECODE_STATE)
6586
    if pnode.drained:
6587
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6588
                                 pnode.name, errors.ECODE_STATE)
6589

    
6590
    self.secondaries = []
6591

    
6592
    # mirror node verification
6593
    if self.op.disk_template in constants.DTS_NET_MIRROR:
6594
      if self.op.snode is None:
6595
        raise errors.OpPrereqError("The networked disk templates need"
6596
                                   " a mirror node", errors.ECODE_INVAL)
6597
      if self.op.snode == pnode.name:
6598
        raise errors.OpPrereqError("The secondary node cannot be the"
6599
                                   " primary node.", errors.ECODE_INVAL)
6600
      _CheckNodeOnline(self, self.op.snode)
6601
      _CheckNodeNotDrained(self, self.op.snode)
6602
      self.secondaries.append(self.op.snode)
6603

    
6604
    nodenames = [pnode.name] + self.secondaries
6605

    
6606
    req_size = _ComputeDiskSize(self.op.disk_template,
6607
                                self.disks)
6608

    
6609
    # Check lv size requirements, if not adopting
6610
    if req_size is not None and not self.adopt_disks:
6611
      _CheckNodesFreeDisk(self, nodenames, req_size)
6612

    
6613
    if self.adopt_disks: # instead, we must check the adoption data
6614
      all_lvs = set([i["adopt"] for i in self.disks])
6615
      if len(all_lvs) != len(self.disks):
6616
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
6617
                                   errors.ECODE_INVAL)
6618
      for lv_name in all_lvs:
6619
        try:
6620
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6621
        except errors.ReservationError:
6622
          raise errors.OpPrereqError("LV named %s used by another instance" %
6623
                                     lv_name, errors.ECODE_NOTUNIQUE)
6624

    
6625
      node_lvs = self.rpc.call_lv_list([pnode.name],
6626
                                       self.cfg.GetVGName())[pnode.name]
6627
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6628
      node_lvs = node_lvs.payload
6629
      delta = all_lvs.difference(node_lvs.keys())
6630
      if delta:
6631
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
6632
                                   utils.CommaJoin(delta),
6633
                                   errors.ECODE_INVAL)
6634
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6635
      if online_lvs:
6636
        raise errors.OpPrereqError("Online logical volumes found, cannot"
6637
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
6638
                                   errors.ECODE_STATE)
6639
      # update the size of disk based on what is found
6640
      for dsk in self.disks:
6641
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6642

    
6643
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6644

    
6645
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6646

    
6647
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6648

    
6649
    # memory check on primary node
6650
    if self.op.start:
6651
      _CheckNodeFreeMemory(self, self.pnode.name,
6652
                           "creating instance %s" % self.op.instance_name,
6653
                           self.be_full[constants.BE_MEMORY],
6654
                           self.op.hypervisor)
6655

    
6656
    self.dry_run_result = list(nodenames)
6657

    
6658
  def Exec(self, feedback_fn):
6659
    """Create and add the instance to the cluster.
6660

6661
    """
6662
    instance = self.op.instance_name
6663
    pnode_name = self.pnode.name
6664

    
6665
    ht_kind = self.op.hypervisor
6666
    if ht_kind in constants.HTS_REQ_PORT:
6667
      network_port = self.cfg.AllocatePort()
6668
    else:
6669
      network_port = None
6670

    
6671
    if constants.ENABLE_FILE_STORAGE:
6672
      # this is needed because os.path.join does not accept None arguments
6673
      if self.op.file_storage_dir is None:
6674
        string_file_storage_dir = ""
6675
      else:
6676
        string_file_storage_dir = self.op.file_storage_dir
6677

    
6678
      # build the full file storage dir path
6679
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6680
                                        string_file_storage_dir, instance)
6681
    else:
6682
      file_storage_dir = ""
6683

    
6684

    
6685
    disks = _GenerateDiskTemplate(self,
6686
                                  self.op.disk_template,
6687
                                  instance, pnode_name,
6688
                                  self.secondaries,
6689
                                  self.disks,
6690
                                  file_storage_dir,
6691
                                  self.op.file_driver,
6692
                                  0)
6693

    
6694
    iobj = objects.Instance(name=instance, os=self.op.os_type,
6695
                            primary_node=pnode_name,
6696
                            nics=self.nics, disks=disks,
6697
                            disk_template=self.op.disk_template,
6698
                            admin_up=False,
6699
                            network_port=network_port,
6700
                            beparams=self.op.beparams,
6701
                            hvparams=self.op.hvparams,
6702
                            hypervisor=self.op.hypervisor,
6703
                            )
6704

    
6705
    if self.adopt_disks:
6706
      # rename LVs to the newly-generated names; we need to construct
6707
      # 'fake' LV disks with the old data, plus the new unique_id
6708
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6709
      rename_to = []
6710
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6711
        rename_to.append(t_dsk.logical_id)
6712
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6713
        self.cfg.SetDiskID(t_dsk, pnode_name)
6714
      result = self.rpc.call_blockdev_rename(pnode_name,
6715
                                             zip(tmp_disks, rename_to))
6716
      result.Raise("Failed to rename adoped LVs")
6717
    else:
6718
      feedback_fn("* creating instance disks...")
6719
      try:
6720
        _CreateDisks(self, iobj)
6721
      except errors.OpExecError:
6722
        self.LogWarning("Device creation failed, reverting...")
6723
        try:
6724
          _RemoveDisks(self, iobj)
6725
        finally:
6726
          self.cfg.ReleaseDRBDMinors(instance)
6727
          raise
6728

    
6729
    feedback_fn("adding instance %s to cluster config" % instance)
6730

    
6731
    self.cfg.AddInstance(iobj, self.proc.GetECId())
6732

    
6733
    # Declare that we don't want to remove the instance lock anymore, as we've
6734
    # added the instance to the config
6735
    del self.remove_locks[locking.LEVEL_INSTANCE]
6736
    # Unlock all the nodes
6737
    if self.op.mode == constants.INSTANCE_IMPORT:
6738
      nodes_keep = [self.op.src_node]
6739
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6740
                       if node != self.op.src_node]
6741
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6742
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6743
    else:
6744
      self.context.glm.release(locking.LEVEL_NODE)
6745
      del self.acquired_locks[locking.LEVEL_NODE]
6746

    
6747
    if self.op.wait_for_sync:
6748
      disk_abort = not _WaitForSync(self, iobj)
6749
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
6750
      # make sure the disks are not degraded (still sync-ing is ok)
6751
      time.sleep(15)
6752
      feedback_fn("* checking mirrors status")
6753
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6754
    else:
6755
      disk_abort = False
6756

    
6757
    if disk_abort:
6758
      _RemoveDisks(self, iobj)
6759
      self.cfg.RemoveInstance(iobj.name)
6760
      # Make sure the instance lock gets removed
6761
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6762
      raise errors.OpExecError("There are some degraded disks for"
6763
                               " this instance")
6764

    
6765
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6766
      if self.op.mode == constants.INSTANCE_CREATE:
6767
        if not self.op.no_install:
6768
          feedback_fn("* running the instance OS create scripts...")
6769
          # FIXME: pass debug option from opcode to backend
6770
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6771
                                                 self.op.debug_level)
6772
          result.Raise("Could not add os for instance %s"
6773
                       " on node %s" % (instance, pnode_name))
6774

    
6775
      elif self.op.mode == constants.INSTANCE_IMPORT:
6776
        feedback_fn("* running the instance OS import scripts...")
6777
        src_node = self.op.src_node
6778
        src_images = self.src_images
6779
        cluster_name = self.cfg.GetClusterName()
6780
        # FIXME: pass debug option from opcode to backend
6781
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6782
                                                         src_node, src_images,
6783
                                                         cluster_name,
6784
                                                         self.op.debug_level)
6785
        msg = import_result.fail_msg
6786
        if msg:
6787
          self.LogWarning("Error while importing the disk images for instance"
6788
                          " %s on node %s: %s" % (instance, pnode_name, msg))
6789
      else:
6790
        # also checked in the prereq part
6791
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6792
                                     % self.op.mode)
6793

    
6794
    if self.op.start:
6795
      iobj.admin_up = True
6796
      self.cfg.Update(iobj, feedback_fn)
6797
      logging.info("Starting instance %s on node %s", instance, pnode_name)
6798
      feedback_fn("* starting instance...")
6799
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6800
      result.Raise("Could not start instance")
6801

    
6802
    return list(iobj.all_nodes)
6803

    
6804

    
6805
class LUConnectConsole(NoHooksLU):
6806
  """Connect to an instance's console.
6807

6808
  This is somewhat special in that it returns the command line that
6809
  you need to run on the master node in order to connect to the
6810
  console.
6811

6812
  """
6813
  _OP_REQP = ["instance_name"]
6814
  REQ_BGL = False
6815

    
6816
  def ExpandNames(self):
6817
    self._ExpandAndLockInstance()
6818

    
6819
  def CheckPrereq(self):
6820
    """Check prerequisites.
6821

6822
    This checks that the instance is in the cluster.
6823

6824
    """
6825
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6826
    assert self.instance is not None, \
6827
      "Cannot retrieve locked instance %s" % self.op.instance_name
6828
    _CheckNodeOnline(self, self.instance.primary_node)
6829

    
6830
  def Exec(self, feedback_fn):
6831
    """Connect to the console of an instance
6832

6833
    """
6834
    instance = self.instance
6835
    node = instance.primary_node
6836

    
6837
    node_insts = self.rpc.call_instance_list([node],
6838
                                             [instance.hypervisor])[node]
6839
    node_insts.Raise("Can't get node information from %s" % node)
6840

    
6841
    if instance.name not in node_insts.payload:
6842
      raise errors.OpExecError("Instance %s is not running." % instance.name)
6843

    
6844
    logging.debug("Connecting to console of %s on %s", instance.name, node)
6845

    
6846
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
6847
    cluster = self.cfg.GetClusterInfo()
6848
    # beparams and hvparams are passed separately, to avoid editing the
6849
    # instance and then saving the defaults in the instance itself.
6850
    hvparams = cluster.FillHV(instance)
6851
    beparams = cluster.FillBE(instance)
6852
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6853

    
6854
    # build ssh cmdline
6855
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6856

    
6857

    
6858
class LUReplaceDisks(LogicalUnit):
6859
  """Replace the disks of an instance.
6860

6861
  """
6862
  HPATH = "mirrors-replace"
6863
  HTYPE = constants.HTYPE_INSTANCE
6864
  _OP_REQP = ["instance_name", "mode", "disks"]
6865
  REQ_BGL = False
6866

    
6867
  def CheckArguments(self):
6868
    if not hasattr(self.op, "remote_node"):
6869
      self.op.remote_node = None
6870
    if not hasattr(self.op, "iallocator"):
6871
      self.op.iallocator = None
6872
    if not hasattr(self.op, "early_release"):
6873
      self.op.early_release = False
6874

    
6875
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6876
                                  self.op.iallocator)
6877

    
6878
  def ExpandNames(self):
6879
    self._ExpandAndLockInstance()
6880

    
6881
    if self.op.iallocator is not None:
6882
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6883

    
6884
    elif self.op.remote_node is not None:
6885
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6886
      self.op.remote_node = remote_node
6887

    
6888
      # Warning: do not remove the locking of the new secondary here
6889
      # unless DRBD8.AddChildren is changed to work in parallel;
6890
      # currently it doesn't since parallel invocations of
6891
      # FindUnusedMinor will conflict
6892
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6893
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6894

    
6895
    else:
6896
      self.needed_locks[locking.LEVEL_NODE] = []
6897
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6898

    
6899
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6900
                                   self.op.iallocator, self.op.remote_node,
6901
                                   self.op.disks, False, self.op.early_release)
6902

    
6903
    self.tasklets = [self.replacer]
6904

    
6905
  def DeclareLocks(self, level):
6906
    # If we're not already locking all nodes in the set we have to declare the
6907
    # instance's primary/secondary nodes.
6908
    if (level == locking.LEVEL_NODE and
6909
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6910
      self._LockInstancesNodes()
6911

    
6912
  def BuildHooksEnv(self):
6913
    """Build hooks env.
6914

6915
    This runs on the master, the primary and all the secondaries.
6916

6917
    """
6918
    instance = self.replacer.instance
6919
    env = {
6920
      "MODE": self.op.mode,
6921
      "NEW_SECONDARY": self.op.remote_node,
6922
      "OLD_SECONDARY": instance.secondary_nodes[0],
6923
      }
6924
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6925
    nl = [
6926
      self.cfg.GetMasterNode(),
6927
      instance.primary_node,
6928
      ]
6929
    if self.op.remote_node is not None:
6930
      nl.append(self.op.remote_node)
6931
    return env, nl, nl
6932

    
6933

    
6934
class LUEvacuateNode(LogicalUnit):
6935
  """Relocate the secondary instances from a node.
6936

6937
  """
6938
  HPATH = "node-evacuate"
6939
  HTYPE = constants.HTYPE_NODE
6940
  _OP_REQP = ["node_name"]
6941
  REQ_BGL = False
6942

    
6943
  def CheckArguments(self):
6944
    if not hasattr(self.op, "remote_node"):
6945
      self.op.remote_node = None
6946
    if not hasattr(self.op, "iallocator"):
6947
      self.op.iallocator = None
6948
    if not hasattr(self.op, "early_release"):
6949
      self.op.early_release = False
6950

    
6951
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6952
                                  self.op.remote_node,
6953
                                  self.op.iallocator)
6954

    
6955
  def ExpandNames(self):
6956
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6957

    
6958
    self.needed_locks = {}
6959

    
6960
    # Declare node locks
6961
    if self.op.iallocator is not None:
6962
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6963

    
6964
    elif self.op.remote_node is not None:
6965
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6966

    
6967
      # Warning: do not remove the locking of the new secondary here
6968
      # unless DRBD8.AddChildren is changed to work in parallel;
6969
      # currently it doesn't since parallel invocations of
6970
      # FindUnusedMinor will conflict
6971
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
6972
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6973

    
6974
    else:
6975
      raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
6976

    
6977
    # Create tasklets for replacing disks for all secondary instances on this
6978
    # node
6979
    names = []
6980
    tasklets = []
6981

    
6982
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6983
      logging.debug("Replacing disks for instance %s", inst.name)
6984
      names.append(inst.name)
6985

    
6986
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6987
                                self.op.iallocator, self.op.remote_node, [],
6988
                                True, self.op.early_release)
6989
      tasklets.append(replacer)
6990

    
6991
    self.tasklets = tasklets
6992
    self.instance_names = names
6993

    
6994
    # Declare instance locks
6995
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6996

    
6997
  def DeclareLocks(self, level):
6998
    # If we're not already locking all nodes in the set we have to declare the
6999
    # instance's primary/secondary nodes.
7000
    if (level == locking.LEVEL_NODE and
7001
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7002
      self._LockInstancesNodes()
7003

    
7004
  def BuildHooksEnv(self):
7005
    """Build hooks env.
7006

7007
    This runs on the master, the primary and all the secondaries.
7008

7009
    """
7010
    env = {
7011
      "NODE_NAME": self.op.node_name,
7012
      }
7013

    
7014
    nl = [self.cfg.GetMasterNode()]
7015

    
7016
    if self.op.remote_node is not None:
7017
      env["NEW_SECONDARY"] = self.op.remote_node
7018
      nl.append(self.op.remote_node)
7019

    
7020
    return (env, nl, nl)
7021

    
7022

    
7023
class TLReplaceDisks(Tasklet):
7024
  """Replaces disks for an instance.
7025

7026
  Note: Locking is not within the scope of this class.
7027

7028
  """
7029
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7030
               disks, delay_iallocator, early_release):
7031
    """Initializes this class.
7032

7033
    """
7034
    Tasklet.__init__(self, lu)
7035

    
7036
    # Parameters
7037
    self.instance_name = instance_name
7038
    self.mode = mode
7039
    self.iallocator_name = iallocator_name
7040
    self.remote_node = remote_node
7041
    self.disks = disks
7042
    self.delay_iallocator = delay_iallocator
7043
    self.early_release = early_release
7044

    
7045
    # Runtime data
7046
    self.instance = None
7047
    self.new_node = None
7048
    self.target_node = None
7049
    self.other_node = None
7050
    self.remote_node_info = None
7051
    self.node_secondary_ip = None
7052

    
7053
  @staticmethod
7054
  def CheckArguments(mode, remote_node, iallocator):
7055
    """Helper function for users of this class.
7056

7057
    """
7058
    # check for valid parameter combination
7059
    if mode == constants.REPLACE_DISK_CHG:
7060
      if remote_node is None and iallocator is None:
7061
        raise errors.OpPrereqError("When changing the secondary either an"
7062
                                   " iallocator script must be used or the"
7063
                                   " new node given", errors.ECODE_INVAL)
7064

    
7065
      if remote_node is not None and iallocator is not None:
7066
        raise errors.OpPrereqError("Give either the iallocator or the new"
7067
                                   " secondary, not both", errors.ECODE_INVAL)
7068

    
7069
    elif remote_node is not None or iallocator is not None:
7070
      # Not replacing the secondary
7071
      raise errors.OpPrereqError("The iallocator and new node options can"
7072
                                 " only be used when changing the"
7073
                                 " secondary node", errors.ECODE_INVAL)
7074

    
7075
  @staticmethod
7076
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7077
    """Compute a new secondary node using an IAllocator.
7078

7079
    """
7080
    ial = IAllocator(lu.cfg, lu.rpc,
7081
                     mode=constants.IALLOCATOR_MODE_RELOC,
7082
                     name=instance_name,
7083
                     relocate_from=relocate_from)
7084

    
7085
    ial.Run(iallocator_name)
7086

    
7087
    if not ial.success:
7088
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7089
                                 " %s" % (iallocator_name, ial.info),
7090
                                 errors.ECODE_NORES)
7091

    
7092
    if len(ial.result) != ial.required_nodes:
7093
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7094
                                 " of nodes (%s), required %s" %
7095
                                 (iallocator_name,
7096
                                  len(ial.result), ial.required_nodes),
7097
                                 errors.ECODE_FAULT)
7098

    
7099
    remote_node_name = ial.result[0]
7100

    
7101
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7102
               instance_name, remote_node_name)
7103

    
7104
    return remote_node_name
7105

    
7106
  def _FindFaultyDisks(self, node_name):
7107
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7108
                                    node_name, True)
7109

    
7110
  def CheckPrereq(self):
7111
    """Check prerequisites.
7112

7113
    This checks that the instance is in the cluster.
7114

7115
    """
7116
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7117
    assert instance is not None, \
7118
      "Cannot retrieve locked instance %s" % self.instance_name
7119

    
7120
    if instance.disk_template != constants.DT_DRBD8:
7121
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7122
                                 " instances", errors.ECODE_INVAL)
7123

    
7124
    if len(instance.secondary_nodes) != 1:
7125
      raise errors.OpPrereqError("The instance has a strange layout,"
7126
                                 " expected one secondary but found %d" %
7127
                                 len(instance.secondary_nodes),
7128
                                 errors.ECODE_FAULT)
7129

    
7130
    if not self.delay_iallocator:
7131
      self._CheckPrereq2()
7132

    
7133
  def _CheckPrereq2(self):
7134
    """Check prerequisites, second part.
7135

7136
    This function should always be part of CheckPrereq. It was separated and is
7137
    now called from Exec because during node evacuation iallocator was only
7138
    called with an unmodified cluster model, not taking planned changes into
7139
    account.
7140

7141
    """
7142
    instance = self.instance
7143
    secondary_node = instance.secondary_nodes[0]
7144

    
7145
    if self.iallocator_name is None:
7146
      remote_node = self.remote_node
7147
    else:
7148
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7149
                                       instance.name, instance.secondary_nodes)
7150

    
7151
    if remote_node is not None:
7152
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7153
      assert self.remote_node_info is not None, \
7154
        "Cannot retrieve locked node %s" % remote_node
7155
    else:
7156
      self.remote_node_info = None
7157

    
7158
    if remote_node == self.instance.primary_node:
7159
      raise errors.OpPrereqError("The specified node is the primary node of"
7160
                                 " the instance.", errors.ECODE_INVAL)
7161

    
7162
    if remote_node == secondary_node:
7163
      raise errors.OpPrereqError("The specified node is already the"
7164
                                 " secondary node of the instance.",
7165
                                 errors.ECODE_INVAL)
7166

    
7167
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7168
                                    constants.REPLACE_DISK_CHG):
7169
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7170
                                 errors.ECODE_INVAL)
7171

    
7172
    if self.mode == constants.REPLACE_DISK_AUTO:
7173
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7174
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7175

    
7176
      if faulty_primary and faulty_secondary:
7177
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7178
                                   " one node and can not be repaired"
7179
                                   " automatically" % self.instance_name,
7180
                                   errors.ECODE_STATE)
7181

    
7182
      if faulty_primary:
7183
        self.disks = faulty_primary
7184
        self.target_node = instance.primary_node
7185
        self.other_node = secondary_node
7186
        check_nodes = [self.target_node, self.other_node]
7187
      elif faulty_secondary:
7188
        self.disks = faulty_secondary
7189
        self.target_node = secondary_node
7190
        self.other_node = instance.primary_node
7191
        check_nodes = [self.target_node, self.other_node]
7192
      else:
7193
        self.disks = []
7194
        check_nodes = []
7195

    
7196
    else:
7197
      # Non-automatic modes
7198
      if self.mode == constants.REPLACE_DISK_PRI:
7199
        self.target_node = instance.primary_node
7200
        self.other_node = secondary_node
7201
        check_nodes = [self.target_node, self.other_node]
7202

    
7203
      elif self.mode == constants.REPLACE_DISK_SEC:
7204
        self.target_node = secondary_node
7205
        self.other_node = instance.primary_node
7206
        check_nodes = [self.target_node, self.other_node]
7207

    
7208
      elif self.mode == constants.REPLACE_DISK_CHG:
7209
        self.new_node = remote_node
7210
        self.other_node = instance.primary_node
7211
        self.target_node = secondary_node
7212
        check_nodes = [self.new_node, self.other_node]
7213

    
7214
        _CheckNodeNotDrained(self.lu, remote_node)
7215

    
7216
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
7217
        assert old_node_info is not None
7218
        if old_node_info.offline and not self.early_release:
7219
          # doesn't make sense to delay the release
7220
          self.early_release = True
7221
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7222
                          " early-release mode", secondary_node)
7223

    
7224
      else:
7225
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7226
                                     self.mode)
7227

    
7228
      # If not specified all disks should be replaced
7229
      if not self.disks:
7230
        self.disks = range(len(self.instance.disks))
7231

    
7232
    for node in check_nodes:
7233
      _CheckNodeOnline(self.lu, node)
7234

    
7235
    # Check whether disks are valid
7236
    for disk_idx in self.disks:
7237
      instance.FindDisk(disk_idx)
7238

    
7239
    # Get secondary node IP addresses
7240
    node_2nd_ip = {}
7241

    
7242
    for node_name in [self.target_node, self.other_node, self.new_node]:
7243
      if node_name is not None:
7244
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7245

    
7246
    self.node_secondary_ip = node_2nd_ip
7247

    
7248
  def Exec(self, feedback_fn):
7249
    """Execute disk replacement.
7250

7251
    This dispatches the disk replacement to the appropriate handler.
7252

7253
    """
7254
    if self.delay_iallocator:
7255
      self._CheckPrereq2()
7256

    
7257
    if not self.disks:
7258
      feedback_fn("No disks need replacement")
7259
      return
7260

    
7261
    feedback_fn("Replacing disk(s) %s for %s" %
7262
                (utils.CommaJoin(self.disks), self.instance.name))
7263

    
7264
    activate_disks = (not self.instance.admin_up)
7265

    
7266
    # Activate the instance disks if we're replacing them on a down instance
7267
    if activate_disks:
7268
      _StartInstanceDisks(self.lu, self.instance, True)
7269

    
7270
    try:
7271
      # Should we replace the secondary node?
7272
      if self.new_node is not None:
7273
        fn = self._ExecDrbd8Secondary
7274
      else:
7275
        fn = self._ExecDrbd8DiskOnly
7276

    
7277
      return fn(feedback_fn)
7278

    
7279
    finally:
7280
      # Deactivate the instance disks if we're replacing them on a
7281
      # down instance
7282
      if activate_disks:
7283
        _SafeShutdownInstanceDisks(self.lu, self.instance)
7284

    
7285
  def _CheckVolumeGroup(self, nodes):
7286
    self.lu.LogInfo("Checking volume groups")
7287

    
7288
    vgname = self.cfg.GetVGName()
7289

    
7290
    # Make sure volume group exists on all involved nodes
7291
    results = self.rpc.call_vg_list(nodes)
7292
    if not results:
7293
      raise errors.OpExecError("Can't list volume groups on the nodes")
7294

    
7295
    for node in nodes:
7296
      res = results[node]
7297
      res.Raise("Error checking node %s" % node)
7298
      if vgname not in res.payload:
7299
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
7300
                                 (vgname, node))
7301

    
7302
  def _CheckDisksExistence(self, nodes):
7303
    # Check disk existence
7304
    for idx, dev in enumerate(self.instance.disks):
7305
      if idx not in self.disks:
7306
        continue
7307

    
7308
      for node in nodes:
7309
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7310
        self.cfg.SetDiskID(dev, node)
7311

    
7312
        result = self.rpc.call_blockdev_find(node, dev)
7313

    
7314
        msg = result.fail_msg
7315
        if msg or not result.payload:
7316
          if not msg:
7317
            msg = "disk not found"
7318
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7319
                                   (idx, node, msg))
7320

    
7321
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7322
    for idx, dev in enumerate(self.instance.disks):
7323
      if idx not in self.disks:
7324
        continue
7325

    
7326
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7327
                      (idx, node_name))
7328

    
7329
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7330
                                   ldisk=ldisk):
7331
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7332
                                 " replace disks for instance %s" %
7333
                                 (node_name, self.instance.name))
7334

    
7335
  def _CreateNewStorage(self, node_name):
7336
    vgname = self.cfg.GetVGName()
7337
    iv_names = {}
7338

    
7339
    for idx, dev in enumerate(self.instance.disks):
7340
      if idx not in self.disks:
7341
        continue
7342

    
7343
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7344

    
7345
      self.cfg.SetDiskID(dev, node_name)
7346

    
7347
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7348
      names = _GenerateUniqueNames(self.lu, lv_names)
7349

    
7350
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7351
                             logical_id=(vgname, names[0]))
7352
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7353
                             logical_id=(vgname, names[1]))
7354

    
7355
      new_lvs = [lv_data, lv_meta]
7356
      old_lvs = dev.children
7357
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7358

    
7359
      # we pass force_create=True to force the LVM creation
7360
      for new_lv in new_lvs:
7361
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7362
                        _GetInstanceInfoText(self.instance), False)
7363

    
7364
    return iv_names
7365

    
7366
  def _CheckDevices(self, node_name, iv_names):
7367
    for name, (dev, _, _) in iv_names.iteritems():
7368
      self.cfg.SetDiskID(dev, node_name)
7369

    
7370
      result = self.rpc.call_blockdev_find(node_name, dev)
7371

    
7372
      msg = result.fail_msg
7373
      if msg or not result.payload:
7374
        if not msg:
7375
          msg = "disk not found"
7376
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
7377
                                 (name, msg))
7378

    
7379
      if result.payload.is_degraded:
7380
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
7381

    
7382
  def _RemoveOldStorage(self, node_name, iv_names):
7383
    for name, (_, old_lvs, _) in iv_names.iteritems():
7384
      self.lu.LogInfo("Remove logical volumes for %s" % name)
7385

    
7386
      for lv in old_lvs:
7387
        self.cfg.SetDiskID(lv, node_name)
7388

    
7389
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7390
        if msg:
7391
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
7392
                             hint="remove unused LVs manually")
7393

    
7394
  def _ReleaseNodeLock(self, node_name):
7395
    """Releases the lock for a given node."""
7396
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7397

    
7398
  def _ExecDrbd8DiskOnly(self, feedback_fn):
7399
    """Replace a disk on the primary or secondary for DRBD 8.
7400

7401
    The algorithm for replace is quite complicated:
7402

7403
      1. for each disk to be replaced:
7404

7405
        1. create new LVs on the target node with unique names
7406
        1. detach old LVs from the drbd device
7407
        1. rename old LVs to name_replaced.<time_t>
7408
        1. rename new LVs to old LVs
7409
        1. attach the new LVs (with the old names now) to the drbd device
7410

7411
      1. wait for sync across all devices
7412

7413
      1. for each modified disk:
7414

7415
        1. remove old LVs (which have the name name_replaces.<time_t>)
7416

7417
    Failures are not very well handled.
7418

7419
    """
7420
    steps_total = 6
7421

    
7422
    # Step: check device activation
7423
    self.lu.LogStep(1, steps_total, "Check device existence")
7424
    self._CheckDisksExistence([self.other_node, self.target_node])
7425
    self._CheckVolumeGroup([self.target_node, self.other_node])
7426

    
7427
    # Step: check other node consistency
7428
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7429
    self._CheckDisksConsistency(self.other_node,
7430
                                self.other_node == self.instance.primary_node,
7431
                                False)
7432

    
7433
    # Step: create new storage
7434
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7435
    iv_names = self._CreateNewStorage(self.target_node)
7436

    
7437
    # Step: for each lv, detach+rename*2+attach
7438
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7439
    for dev, old_lvs, new_lvs in iv_names.itervalues():
7440
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7441

    
7442
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7443
                                                     old_lvs)
7444
      result.Raise("Can't detach drbd from local storage on node"
7445
                   " %s for device %s" % (self.target_node, dev.iv_name))
7446
      #dev.children = []
7447
      #cfg.Update(instance)
7448

    
7449
      # ok, we created the new LVs, so now we know we have the needed
7450
      # storage; as such, we proceed on the target node to rename
7451
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7452
      # using the assumption that logical_id == physical_id (which in
7453
      # turn is the unique_id on that node)
7454

    
7455
      # FIXME(iustin): use a better name for the replaced LVs
7456
      temp_suffix = int(time.time())
7457
      ren_fn = lambda d, suff: (d.physical_id[0],
7458
                                d.physical_id[1] + "_replaced-%s" % suff)
7459

    
7460
      # Build the rename list based on what LVs exist on the node
7461
      rename_old_to_new = []
7462
      for to_ren in old_lvs:
7463
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7464
        if not result.fail_msg and result.payload:
7465
          # device exists
7466
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7467

    
7468
      self.lu.LogInfo("Renaming the old LVs on the target node")
7469
      result = self.rpc.call_blockdev_rename(self.target_node,
7470
                                             rename_old_to_new)
7471
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
7472

    
7473
      # Now we rename the new LVs to the old LVs
7474
      self.lu.LogInfo("Renaming the new LVs on the target node")
7475
      rename_new_to_old = [(new, old.physical_id)
7476
                           for old, new in zip(old_lvs, new_lvs)]
7477
      result = self.rpc.call_blockdev_rename(self.target_node,
7478
                                             rename_new_to_old)
7479
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
7480

    
7481
      for old, new in zip(old_lvs, new_lvs):
7482
        new.logical_id = old.logical_id
7483
        self.cfg.SetDiskID(new, self.target_node)
7484

    
7485
      for disk in old_lvs:
7486
        disk.logical_id = ren_fn(disk, temp_suffix)
7487
        self.cfg.SetDiskID(disk, self.target_node)
7488

    
7489
      # Now that the new lvs have the old name, we can add them to the device
7490
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7491
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7492
                                                  new_lvs)
7493
      msg = result.fail_msg
7494
      if msg:
7495
        for new_lv in new_lvs:
7496
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
7497
                                               new_lv).fail_msg
7498
          if msg2:
7499
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7500
                               hint=("cleanup manually the unused logical"
7501
                                     "volumes"))
7502
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7503

    
7504
      dev.children = new_lvs
7505

    
7506
      self.cfg.Update(self.instance, feedback_fn)
7507

    
7508
    cstep = 5
7509
    if self.early_release:
7510
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7511
      cstep += 1
7512
      self._RemoveOldStorage(self.target_node, iv_names)
7513
      # WARNING: we release both node locks here, do not do other RPCs
7514
      # than WaitForSync to the primary node
7515
      self._ReleaseNodeLock([self.target_node, self.other_node])
7516

    
7517
    # Wait for sync
7518
    # This can fail as the old devices are degraded and _WaitForSync
7519
    # does a combined result over all disks, so we don't check its return value
7520
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7521
    cstep += 1
7522
    _WaitForSync(self.lu, self.instance)
7523

    
7524
    # Check all devices manually
7525
    self._CheckDevices(self.instance.primary_node, iv_names)
7526

    
7527
    # Step: remove old storage
7528
    if not self.early_release:
7529
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7530
      cstep += 1
7531
      self._RemoveOldStorage(self.target_node, iv_names)
7532

    
7533
  def _ExecDrbd8Secondary(self, feedback_fn):
7534
    """Replace the secondary node for DRBD 8.
7535

7536
    The algorithm for replace is quite complicated:
7537
      - for all disks of the instance:
7538
        - create new LVs on the new node with same names
7539
        - shutdown the drbd device on the old secondary
7540
        - disconnect the drbd network on the primary
7541
        - create the drbd device on the new secondary
7542
        - network attach the drbd on the primary, using an artifice:
7543
          the drbd code for Attach() will connect to the network if it
7544
          finds a device which is connected to the good local disks but
7545
          not network enabled
7546
      - wait for sync across all devices
7547
      - remove all disks from the old secondary
7548

7549
    Failures are not very well handled.
7550

7551
    """
7552
    steps_total = 6
7553

    
7554
    # Step: check device activation
7555
    self.lu.LogStep(1, steps_total, "Check device existence")
7556
    self._CheckDisksExistence([self.instance.primary_node])
7557
    self._CheckVolumeGroup([self.instance.primary_node])
7558

    
7559
    # Step: check other node consistency
7560
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7561
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
7562

    
7563
    # Step: create new storage
7564
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7565
    for idx, dev in enumerate(self.instance.disks):
7566
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7567
                      (self.new_node, idx))
7568
      # we pass force_create=True to force LVM creation
7569
      for new_lv in dev.children:
7570
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7571
                        _GetInstanceInfoText(self.instance), False)
7572

    
7573
    # Step 4: dbrd minors and drbd setups changes
7574
    # after this, we must manually remove the drbd minors on both the
7575
    # error and the success paths
7576
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7577
    minors = self.cfg.AllocateDRBDMinor([self.new_node
7578
                                         for dev in self.instance.disks],
7579
                                        self.instance.name)
7580
    logging.debug("Allocated minors %r", minors)
7581

    
7582
    iv_names = {}
7583
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7584
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7585
                      (self.new_node, idx))
7586
      # create new devices on new_node; note that we create two IDs:
7587
      # one without port, so the drbd will be activated without
7588
      # networking information on the new node at this stage, and one
7589
      # with network, for the latter activation in step 4
7590
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7591
      if self.instance.primary_node == o_node1:
7592
        p_minor = o_minor1
7593
      else:
7594
        assert self.instance.primary_node == o_node2, "Three-node instance?"
7595
        p_minor = o_minor2
7596

    
7597
      new_alone_id = (self.instance.primary_node, self.new_node, None,
7598
                      p_minor, new_minor, o_secret)
7599
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
7600
                    p_minor, new_minor, o_secret)
7601

    
7602
      iv_names[idx] = (dev, dev.children, new_net_id)
7603
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7604
                    new_net_id)
7605
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7606
                              logical_id=new_alone_id,
7607
                              children=dev.children,
7608
                              size=dev.size)
7609
      try:
7610
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7611
                              _GetInstanceInfoText(self.instance), False)
7612
      except errors.GenericError:
7613
        self.cfg.ReleaseDRBDMinors(self.instance.name)
7614
        raise
7615

    
7616
    # We have new devices, shutdown the drbd on the old secondary
7617
    for idx, dev in enumerate(self.instance.disks):
7618
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7619
      self.cfg.SetDiskID(dev, self.target_node)
7620
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7621
      if msg:
7622
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7623
                           "node: %s" % (idx, msg),
7624
                           hint=("Please cleanup this device manually as"
7625
                                 " soon as possible"))
7626

    
7627
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7628
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7629
                                               self.node_secondary_ip,
7630
                                               self.instance.disks)\
7631
                                              [self.instance.primary_node]
7632

    
7633
    msg = result.fail_msg
7634
    if msg:
7635
      # detaches didn't succeed (unlikely)
7636
      self.cfg.ReleaseDRBDMinors(self.instance.name)
7637
      raise errors.OpExecError("Can't detach the disks from the network on"
7638
                               " old node: %s" % (msg,))
7639

    
7640
    # if we managed to detach at least one, we update all the disks of
7641
    # the instance to point to the new secondary
7642
    self.lu.LogInfo("Updating instance configuration")
7643
    for dev, _, new_logical_id in iv_names.itervalues():
7644
      dev.logical_id = new_logical_id
7645
      self.cfg.SetDiskID(dev, self.instance.primary_node)
7646

    
7647
    self.cfg.Update(self.instance, feedback_fn)
7648

    
7649
    # and now perform the drbd attach
7650
    self.lu.LogInfo("Attaching primary drbds to new secondary"
7651
                    " (standalone => connected)")
7652
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7653
                                            self.new_node],
7654
                                           self.node_secondary_ip,
7655
                                           self.instance.disks,
7656
                                           self.instance.name,
7657
                                           False)
7658
    for to_node, to_result in result.items():
7659
      msg = to_result.fail_msg
7660
      if msg:
7661
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7662
                           to_node, msg,
7663
                           hint=("please do a gnt-instance info to see the"
7664
                                 " status of disks"))
7665
    cstep = 5
7666
    if self.early_release:
7667
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7668
      cstep += 1
7669
      self._RemoveOldStorage(self.target_node, iv_names)
7670
      # WARNING: we release all node locks here, do not do other RPCs
7671
      # than WaitForSync to the primary node
7672
      self._ReleaseNodeLock([self.instance.primary_node,
7673
                             self.target_node,
7674
                             self.new_node])
7675

    
7676
    # Wait for sync
7677
    # This can fail as the old devices are degraded and _WaitForSync
7678
    # does a combined result over all disks, so we don't check its return value
7679
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7680
    cstep += 1
7681
    _WaitForSync(self.lu, self.instance)
7682

    
7683
    # Check all devices manually
7684
    self._CheckDevices(self.instance.primary_node, iv_names)
7685

    
7686
    # Step: remove old storage
7687
    if not self.early_release:
7688
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7689
      self._RemoveOldStorage(self.target_node, iv_names)
7690

    
7691

    
7692
class LURepairNodeStorage(NoHooksLU):
7693
  """Repairs the volume group on a node.
7694

7695
  """
7696
  _OP_REQP = ["node_name"]
7697
  REQ_BGL = False
7698

    
7699
  def CheckArguments(self):
7700
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7701

    
7702
    _CheckStorageType(self.op.storage_type)
7703

    
7704
  def ExpandNames(self):
7705
    self.needed_locks = {
7706
      locking.LEVEL_NODE: [self.op.node_name],
7707
      }
7708

    
7709
  def _CheckFaultyDisks(self, instance, node_name):
7710
    """Ensure faulty disks abort the opcode or at least warn."""
7711
    try:
7712
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7713
                                  node_name, True):
7714
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7715
                                   " node '%s'" % (instance.name, node_name),
7716
                                   errors.ECODE_STATE)
7717
    except errors.OpPrereqError, err:
7718
      if self.op.ignore_consistency:
7719
        self.proc.LogWarning(str(err.args[0]))
7720
      else:
7721
        raise
7722

    
7723
  def CheckPrereq(self):
7724
    """Check prerequisites.
7725

7726
    """
7727
    storage_type = self.op.storage_type
7728

    
7729
    if (constants.SO_FIX_CONSISTENCY not in
7730
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7731
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
7732
                                 " repaired" % storage_type,
7733
                                 errors.ECODE_INVAL)
7734

    
7735
    # Check whether any instance on this node has faulty disks
7736
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7737
      if not inst.admin_up:
7738
        continue
7739
      check_nodes = set(inst.all_nodes)
7740
      check_nodes.discard(self.op.node_name)
7741
      for inst_node_name in check_nodes:
7742
        self._CheckFaultyDisks(inst, inst_node_name)
7743

    
7744
  def Exec(self, feedback_fn):
7745
    feedback_fn("Repairing storage unit '%s' on %s ..." %
7746
                (self.op.name, self.op.node_name))
7747

    
7748
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7749
    result = self.rpc.call_storage_execute(self.op.node_name,
7750
                                           self.op.storage_type, st_args,
7751
                                           self.op.name,
7752
                                           constants.SO_FIX_CONSISTENCY)
7753
    result.Raise("Failed to repair storage unit '%s' on %s" %
7754
                 (self.op.name, self.op.node_name))
7755

    
7756

    
7757
class LUNodeEvacuationStrategy(NoHooksLU):
7758
  """Computes the node evacuation strategy.
7759

7760
  """
7761
  _OP_REQP = ["nodes"]
7762
  REQ_BGL = False
7763

    
7764
  def CheckArguments(self):
7765
    if not hasattr(self.op, "remote_node"):
7766
      self.op.remote_node = None
7767
    if not hasattr(self.op, "iallocator"):
7768
      self.op.iallocator = None
7769
    if self.op.remote_node is not None and self.op.iallocator is not None:
7770
      raise errors.OpPrereqError("Give either the iallocator or the new"
7771
                                 " secondary, not both", errors.ECODE_INVAL)
7772

    
7773
  def ExpandNames(self):
7774
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7775
    self.needed_locks = locks = {}
7776
    if self.op.remote_node is None:
7777
      locks[locking.LEVEL_NODE] = locking.ALL_SET
7778
    else:
7779
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7780
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7781

    
7782
  def CheckPrereq(self):
7783
    pass
7784

    
7785
  def Exec(self, feedback_fn):
7786
    if self.op.remote_node is not None:
7787
      instances = []
7788
      for node in self.op.nodes:
7789
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7790
      result = []
7791
      for i in instances:
7792
        if i.primary_node == self.op.remote_node:
7793
          raise errors.OpPrereqError("Node %s is the primary node of"
7794
                                     " instance %s, cannot use it as"
7795
                                     " secondary" %
7796
                                     (self.op.remote_node, i.name),
7797
                                     errors.ECODE_INVAL)
7798
        result.append([i.name, self.op.remote_node])
7799
    else:
7800
      ial = IAllocator(self.cfg, self.rpc,
7801
                       mode=constants.IALLOCATOR_MODE_MEVAC,
7802
                       evac_nodes=self.op.nodes)
7803
      ial.Run(self.op.iallocator, validate=True)
7804
      if not ial.success:
7805
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7806
                                 errors.ECODE_NORES)
7807
      result = ial.result
7808
    return result
7809

    
7810

    
7811
class LUGrowDisk(LogicalUnit):
7812
  """Grow a disk of an instance.
7813

7814
  """
7815
  HPATH = "disk-grow"
7816
  HTYPE = constants.HTYPE_INSTANCE
7817
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7818
  REQ_BGL = False
7819

    
7820
  def ExpandNames(self):
7821
    self._ExpandAndLockInstance()
7822
    self.needed_locks[locking.LEVEL_NODE] = []
7823
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7824

    
7825
  def DeclareLocks(self, level):
7826
    if level == locking.LEVEL_NODE:
7827
      self._LockInstancesNodes()
7828

    
7829
  def BuildHooksEnv(self):
7830
    """Build hooks env.
7831

7832
    This runs on the master, the primary and all the secondaries.
7833

7834
    """
7835
    env = {
7836
      "DISK": self.op.disk,
7837
      "AMOUNT": self.op.amount,
7838
      }
7839
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7840
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7841
    return env, nl, nl
7842

    
7843
  def CheckPrereq(self):
7844
    """Check prerequisites.
7845

7846
    This checks that the instance is in the cluster.
7847

7848
    """
7849
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7850
    assert instance is not None, \
7851
      "Cannot retrieve locked instance %s" % self.op.instance_name
7852
    nodenames = list(instance.all_nodes)
7853
    for node in nodenames:
7854
      _CheckNodeOnline(self, node)
7855

    
7856

    
7857
    self.instance = instance
7858

    
7859
    if instance.disk_template not in constants.DTS_GROWABLE:
7860
      raise errors.OpPrereqError("Instance's disk layout does not support"
7861
                                 " growing.", errors.ECODE_INVAL)
7862

    
7863
    self.disk = instance.FindDisk(self.op.disk)
7864

    
7865
    if instance.disk_template != constants.DT_FILE:
7866
      # TODO: check the free disk space for file, when that feature will be
7867
      # supported
7868
      _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7869

    
7870
  def Exec(self, feedback_fn):
7871
    """Execute disk grow.
7872

7873
    """
7874
    instance = self.instance
7875
    disk = self.disk
7876
    for node in instance.all_nodes:
7877
      self.cfg.SetDiskID(disk, node)
7878
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7879
      result.Raise("Grow request failed to node %s" % node)
7880

    
7881
      # TODO: Rewrite code to work properly
7882
      # DRBD goes into sync mode for a short amount of time after executing the
7883
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7884
      # calling "resize" in sync mode fails. Sleeping for a short amount of
7885
      # time is a work-around.
7886
      time.sleep(5)
7887

    
7888
    disk.RecordGrow(self.op.amount)
7889
    self.cfg.Update(instance, feedback_fn)
7890
    if self.op.wait_for_sync:
7891
      disk_abort = not _WaitForSync(self, instance)
7892
      if disk_abort:
7893
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7894
                             " status.\nPlease check the instance.")
7895

    
7896

    
7897
class LUQueryInstanceData(NoHooksLU):
7898
  """Query runtime instance data.
7899

7900
  """
7901
  _OP_REQP = ["instances", "static"]
7902
  REQ_BGL = False
7903

    
7904
  def ExpandNames(self):
7905
    self.needed_locks = {}
7906
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7907

    
7908
    if not isinstance(self.op.instances, list):
7909
      raise errors.OpPrereqError("Invalid argument type 'instances'",
7910
                                 errors.ECODE_INVAL)
7911

    
7912
    if self.op.instances:
7913
      self.wanted_names = []
7914
      for name in self.op.instances:
7915
        full_name = _ExpandInstanceName(self.cfg, name)
7916
        self.wanted_names.append(full_name)
7917
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7918
    else:
7919
      self.wanted_names = None
7920
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7921

    
7922
    self.needed_locks[locking.LEVEL_NODE] = []
7923
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7924

    
7925
  def DeclareLocks(self, level):
7926
    if level == locking.LEVEL_NODE:
7927
      self._LockInstancesNodes()
7928

    
7929
  def CheckPrereq(self):
7930
    """Check prerequisites.
7931

7932
    This only checks the optional instance list against the existing names.
7933

7934
    """
7935
    if self.wanted_names is None:
7936
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7937

    
7938
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7939
                             in self.wanted_names]
7940
    return
7941

    
7942
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
7943
    """Returns the status of a block device
7944

7945
    """
7946
    if self.op.static or not node:
7947
      return None
7948

    
7949
    self.cfg.SetDiskID(dev, node)
7950

    
7951
    result = self.rpc.call_blockdev_find(node, dev)
7952
    if result.offline:
7953
      return None
7954

    
7955
    result.Raise("Can't compute disk status for %s" % instance_name)
7956

    
7957
    status = result.payload
7958
    if status is None:
7959
      return None
7960

    
7961
    return (status.dev_path, status.major, status.minor,
7962
            status.sync_percent, status.estimated_time,
7963
            status.is_degraded, status.ldisk_status)
7964

    
7965
  def _ComputeDiskStatus(self, instance, snode, dev):
7966
    """Compute block device status.
7967

7968
    """
7969
    if dev.dev_type in constants.LDS_DRBD:
7970
      # we change the snode then (otherwise we use the one passed in)
7971
      if dev.logical_id[0] == instance.primary_node:
7972
        snode = dev.logical_id[1]
7973
      else:
7974
        snode = dev.logical_id[0]
7975

    
7976
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
7977
                                              instance.name, dev)
7978
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
7979

    
7980
    if dev.children:
7981
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
7982
                      for child in dev.children]
7983
    else:
7984
      dev_children = []
7985

    
7986
    data = {
7987
      "iv_name": dev.iv_name,
7988
      "dev_type": dev.dev_type,
7989
      "logical_id": dev.logical_id,
7990
      "physical_id": dev.physical_id,
7991
      "pstatus": dev_pstatus,
7992
      "sstatus": dev_sstatus,
7993
      "children": dev_children,
7994
      "mode": dev.mode,
7995
      "size": dev.size,
7996
      }
7997

    
7998
    return data
7999

    
8000
  def Exec(self, feedback_fn):
8001
    """Gather and return data"""
8002
    result = {}
8003

    
8004
    cluster = self.cfg.GetClusterInfo()
8005

    
8006
    for instance in self.wanted_instances:
8007
      if not self.op.static:
8008
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8009
                                                  instance.name,
8010
                                                  instance.hypervisor)
8011
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8012
        remote_info = remote_info.payload
8013
        if remote_info and "state" in remote_info:
8014
          remote_state = "up"
8015
        else:
8016
          remote_state = "down"
8017
      else:
8018
        remote_state = None
8019
      if instance.admin_up:
8020
        config_state = "up"
8021
      else:
8022
        config_state = "down"
8023

    
8024
      disks = [self._ComputeDiskStatus(instance, None, device)
8025
               for device in instance.disks]
8026

    
8027
      idict = {
8028
        "name": instance.name,
8029
        "config_state": config_state,
8030
        "run_state": remote_state,
8031
        "pnode": instance.primary_node,
8032
        "snodes": instance.secondary_nodes,
8033
        "os": instance.os,
8034
        # this happens to be the same format used for hooks
8035
        "nics": _NICListToTuple(self, instance.nics),
8036
        "disks": disks,
8037
        "hypervisor": instance.hypervisor,
8038
        "network_port": instance.network_port,
8039
        "hv_instance": instance.hvparams,
8040
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8041
        "be_instance": instance.beparams,
8042
        "be_actual": cluster.FillBE(instance),
8043
        "serial_no": instance.serial_no,
8044
        "mtime": instance.mtime,
8045
        "ctime": instance.ctime,
8046
        "uuid": instance.uuid,
8047
        }
8048

    
8049
      result[instance.name] = idict
8050

    
8051
    return result
8052

    
8053

    
8054
class LUSetInstanceParams(LogicalUnit):
8055
  """Modifies an instances's parameters.
8056

8057
  """
8058
  HPATH = "instance-modify"
8059
  HTYPE = constants.HTYPE_INSTANCE
8060
  _OP_REQP = ["instance_name"]
8061
  REQ_BGL = False
8062

    
8063
  def CheckArguments(self):
8064
    if not hasattr(self.op, 'nics'):
8065
      self.op.nics = []
8066
    if not hasattr(self.op, 'disks'):
8067
      self.op.disks = []
8068
    if not hasattr(self.op, 'beparams'):
8069
      self.op.beparams = {}
8070
    if not hasattr(self.op, 'hvparams'):
8071
      self.op.hvparams = {}
8072
    if not hasattr(self.op, "disk_template"):
8073
      self.op.disk_template = None
8074
    if not hasattr(self.op, "remote_node"):
8075
      self.op.remote_node = None
8076
    if not hasattr(self.op, "os_name"):
8077
      self.op.os_name = None
8078
    if not hasattr(self.op, "force_variant"):
8079
      self.op.force_variant = False
8080
    self.op.force = getattr(self.op, "force", False)
8081
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8082
            self.op.hvparams or self.op.beparams or self.op.os_name):
8083
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8084

    
8085
    if self.op.hvparams:
8086
      _CheckGlobalHvParams(self.op.hvparams)
8087

    
8088
    # Disk validation
8089
    disk_addremove = 0
8090
    for disk_op, disk_dict in self.op.disks:
8091
      if disk_op == constants.DDM_REMOVE:
8092
        disk_addremove += 1
8093
        continue
8094
      elif disk_op == constants.DDM_ADD:
8095
        disk_addremove += 1
8096
      else:
8097
        if not isinstance(disk_op, int):
8098
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8099
        if not isinstance(disk_dict, dict):
8100
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8101
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8102

    
8103
      if disk_op == constants.DDM_ADD:
8104
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8105
        if mode not in constants.DISK_ACCESS_SET:
8106
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8107
                                     errors.ECODE_INVAL)
8108
        size = disk_dict.get('size', None)
8109
        if size is None:
8110
          raise errors.OpPrereqError("Required disk parameter size missing",
8111
                                     errors.ECODE_INVAL)
8112
        try:
8113
          size = int(size)
8114
        except (TypeError, ValueError), err:
8115
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8116
                                     str(err), errors.ECODE_INVAL)
8117
        disk_dict['size'] = size
8118
      else:
8119
        # modification of disk
8120
        if 'size' in disk_dict:
8121
          raise errors.OpPrereqError("Disk size change not possible, use"
8122
                                     " grow-disk", errors.ECODE_INVAL)
8123

    
8124
    if disk_addremove > 1:
8125
      raise errors.OpPrereqError("Only one disk add or remove operation"
8126
                                 " supported at a time", errors.ECODE_INVAL)
8127

    
8128
    if self.op.disks and self.op.disk_template is not None:
8129
      raise errors.OpPrereqError("Disk template conversion and other disk"
8130
                                 " changes not supported at the same time",
8131
                                 errors.ECODE_INVAL)
8132

    
8133
    if self.op.disk_template:
8134
      _CheckDiskTemplate(self.op.disk_template)
8135
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
8136
          self.op.remote_node is None):
8137
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
8138
                                   " one requires specifying a secondary node",
8139
                                   errors.ECODE_INVAL)
8140

    
8141
    # NIC validation
8142
    nic_addremove = 0
8143
    for nic_op, nic_dict in self.op.nics:
8144
      if nic_op == constants.DDM_REMOVE:
8145
        nic_addremove += 1
8146
        continue
8147
      elif nic_op == constants.DDM_ADD:
8148
        nic_addremove += 1
8149
      else:
8150
        if not isinstance(nic_op, int):
8151
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8152
        if not isinstance(nic_dict, dict):
8153
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8154
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8155

    
8156
      # nic_dict should be a dict
8157
      nic_ip = nic_dict.get('ip', None)
8158
      if nic_ip is not None:
8159
        if nic_ip.lower() == constants.VALUE_NONE:
8160
          nic_dict['ip'] = None
8161
        else:
8162
          if not utils.IsValidIP(nic_ip):
8163
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8164
                                       errors.ECODE_INVAL)
8165

    
8166
      nic_bridge = nic_dict.get('bridge', None)
8167
      nic_link = nic_dict.get('link', None)
8168
      if nic_bridge and nic_link:
8169
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8170
                                   " at the same time", errors.ECODE_INVAL)
8171
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8172
        nic_dict['bridge'] = None
8173
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8174
        nic_dict['link'] = None
8175

    
8176
      if nic_op == constants.DDM_ADD:
8177
        nic_mac = nic_dict.get('mac', None)
8178
        if nic_mac is None:
8179
          nic_dict['mac'] = constants.VALUE_AUTO
8180

    
8181
      if 'mac' in nic_dict:
8182
        nic_mac = nic_dict['mac']
8183
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8184
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8185

    
8186
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8187
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8188
                                     " modifying an existing nic",
8189
                                     errors.ECODE_INVAL)
8190

    
8191
    if nic_addremove > 1:
8192
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8193
                                 " supported at a time", errors.ECODE_INVAL)
8194

    
8195
  def ExpandNames(self):
8196
    self._ExpandAndLockInstance()
8197
    self.needed_locks[locking.LEVEL_NODE] = []
8198
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8199

    
8200
  def DeclareLocks(self, level):
8201
    if level == locking.LEVEL_NODE:
8202
      self._LockInstancesNodes()
8203
      if self.op.disk_template and self.op.remote_node:
8204
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8205
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8206

    
8207
  def BuildHooksEnv(self):
8208
    """Build hooks env.
8209

8210
    This runs on the master, primary and secondaries.
8211

8212
    """
8213
    args = dict()
8214
    if constants.BE_MEMORY in self.be_new:
8215
      args['memory'] = self.be_new[constants.BE_MEMORY]
8216
    if constants.BE_VCPUS in self.be_new:
8217
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
8218
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8219
    # information at all.
8220
    if self.op.nics:
8221
      args['nics'] = []
8222
      nic_override = dict(self.op.nics)
8223
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
8224
      for idx, nic in enumerate(self.instance.nics):
8225
        if idx in nic_override:
8226
          this_nic_override = nic_override[idx]
8227
        else:
8228
          this_nic_override = {}
8229
        if 'ip' in this_nic_override:
8230
          ip = this_nic_override['ip']
8231
        else:
8232
          ip = nic.ip
8233
        if 'mac' in this_nic_override:
8234
          mac = this_nic_override['mac']
8235
        else:
8236
          mac = nic.mac
8237
        if idx in self.nic_pnew:
8238
          nicparams = self.nic_pnew[idx]
8239
        else:
8240
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
8241
        mode = nicparams[constants.NIC_MODE]
8242
        link = nicparams[constants.NIC_LINK]
8243
        args['nics'].append((ip, mac, mode, link))
8244
      if constants.DDM_ADD in nic_override:
8245
        ip = nic_override[constants.DDM_ADD].get('ip', None)
8246
        mac = nic_override[constants.DDM_ADD]['mac']
8247
        nicparams = self.nic_pnew[constants.DDM_ADD]
8248
        mode = nicparams[constants.NIC_MODE]
8249
        link = nicparams[constants.NIC_LINK]
8250
        args['nics'].append((ip, mac, mode, link))
8251
      elif constants.DDM_REMOVE in nic_override:
8252
        del args['nics'][-1]
8253

    
8254
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8255
    if self.op.disk_template:
8256
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8257
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8258
    return env, nl, nl
8259

    
8260
  @staticmethod
8261
  def _GetUpdatedParams(old_params, update_dict,
8262
                        default_values, parameter_types):
8263
    """Return the new params dict for the given params.
8264

8265
    @type old_params: dict
8266
    @param old_params: old parameters
8267
    @type update_dict: dict
8268
    @param update_dict: dict containing new parameter values,
8269
                        or constants.VALUE_DEFAULT to reset the
8270
                        parameter to its default value
8271
    @type default_values: dict
8272
    @param default_values: default values for the filled parameters
8273
    @type parameter_types: dict
8274
    @param parameter_types: dict mapping target dict keys to types
8275
                            in constants.ENFORCEABLE_TYPES
8276
    @rtype: (dict, dict)
8277
    @return: (new_parameters, filled_parameters)
8278

8279
    """
8280
    params_copy = copy.deepcopy(old_params)
8281
    for key, val in update_dict.iteritems():
8282
      if val == constants.VALUE_DEFAULT:
8283
        try:
8284
          del params_copy[key]
8285
        except KeyError:
8286
          pass
8287
      else:
8288
        params_copy[key] = val
8289
    utils.ForceDictType(params_copy, parameter_types)
8290
    params_filled = objects.FillDict(default_values, params_copy)
8291
    return (params_copy, params_filled)
8292

    
8293
  def CheckPrereq(self):
8294
    """Check prerequisites.
8295

8296
    This only checks the instance list against the existing names.
8297

8298
    """
8299
    self.force = self.op.force
8300

    
8301
    # checking the new params on the primary/secondary nodes
8302

    
8303
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8304
    cluster = self.cluster = self.cfg.GetClusterInfo()
8305
    assert self.instance is not None, \
8306
      "Cannot retrieve locked instance %s" % self.op.instance_name
8307
    pnode = instance.primary_node
8308
    nodelist = list(instance.all_nodes)
8309

    
8310
    if self.op.disk_template:
8311
      if instance.disk_template == self.op.disk_template:
8312
        raise errors.OpPrereqError("Instance already has disk template %s" %
8313
                                   instance.disk_template, errors.ECODE_INVAL)
8314

    
8315
      if (instance.disk_template,
8316
          self.op.disk_template) not in self._DISK_CONVERSIONS:
8317
        raise errors.OpPrereqError("Unsupported disk template conversion from"
8318
                                   " %s to %s" % (instance.disk_template,
8319
                                                  self.op.disk_template),
8320
                                   errors.ECODE_INVAL)
8321
      if self.op.disk_template in constants.DTS_NET_MIRROR:
8322
        _CheckNodeOnline(self, self.op.remote_node)
8323
        _CheckNodeNotDrained(self, self.op.remote_node)
8324
        disks = [{"size": d.size} for d in instance.disks]
8325
        required = _ComputeDiskSize(self.op.disk_template, disks)
8326
        _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8327
        _CheckInstanceDown(self, instance, "cannot change disk template")
8328

    
8329
    # hvparams processing
8330
    if self.op.hvparams:
8331
      i_hvdict, hv_new = self._GetUpdatedParams(
8332
                             instance.hvparams, self.op.hvparams,
8333
                             cluster.hvparams[instance.hypervisor],
8334
                             constants.HVS_PARAMETER_TYPES)
8335
      # local check
8336
      hypervisor.GetHypervisor(
8337
        instance.hypervisor).CheckParameterSyntax(hv_new)
8338
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8339
      self.hv_new = hv_new # the new actual values
8340
      self.hv_inst = i_hvdict # the new dict (without defaults)
8341
    else:
8342
      self.hv_new = self.hv_inst = {}
8343

    
8344
    # beparams processing
8345
    if self.op.beparams:
8346
      i_bedict, be_new = self._GetUpdatedParams(
8347
                             instance.beparams, self.op.beparams,
8348
                             cluster.beparams[constants.PP_DEFAULT],
8349
                             constants.BES_PARAMETER_TYPES)
8350
      self.be_new = be_new # the new actual values
8351
      self.be_inst = i_bedict # the new dict (without defaults)
8352
    else:
8353
      self.be_new = self.be_inst = {}
8354

    
8355
    self.warn = []
8356

    
8357
    if constants.BE_MEMORY in self.op.beparams and not self.force:
8358
      mem_check_list = [pnode]
8359
      if be_new[constants.BE_AUTO_BALANCE]:
8360
        # either we changed auto_balance to yes or it was from before
8361
        mem_check_list.extend(instance.secondary_nodes)
8362
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
8363
                                                  instance.hypervisor)
8364
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8365
                                         instance.hypervisor)
8366
      pninfo = nodeinfo[pnode]
8367
      msg = pninfo.fail_msg
8368
      if msg:
8369
        # Assume the primary node is unreachable and go ahead
8370
        self.warn.append("Can't get info from primary node %s: %s" %
8371
                         (pnode,  msg))
8372
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
8373
        self.warn.append("Node data from primary node %s doesn't contain"
8374
                         " free memory information" % pnode)
8375
      elif instance_info.fail_msg:
8376
        self.warn.append("Can't get instance runtime information: %s" %
8377
                        instance_info.fail_msg)
8378
      else:
8379
        if instance_info.payload:
8380
          current_mem = int(instance_info.payload['memory'])
8381
        else:
8382
          # Assume instance not running
8383
          # (there is a slight race condition here, but it's not very probable,
8384
          # and we have no other way to check)
8385
          current_mem = 0
8386
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8387
                    pninfo.payload['memory_free'])
8388
        if miss_mem > 0:
8389
          raise errors.OpPrereqError("This change will prevent the instance"
8390
                                     " from starting, due to %d MB of memory"
8391
                                     " missing on its primary node" % miss_mem,
8392
                                     errors.ECODE_NORES)
8393

    
8394
      if be_new[constants.BE_AUTO_BALANCE]:
8395
        for node, nres in nodeinfo.items():
8396
          if node not in instance.secondary_nodes:
8397
            continue
8398
          msg = nres.fail_msg
8399
          if msg:
8400
            self.warn.append("Can't get info from secondary node %s: %s" %
8401
                             (node, msg))
8402
          elif not isinstance(nres.payload.get('memory_free', None), int):
8403
            self.warn.append("Secondary node %s didn't return free"
8404
                             " memory information" % node)
8405
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8406
            self.warn.append("Not enough memory to failover instance to"
8407
                             " secondary node %s" % node)
8408

    
8409
    # NIC processing
8410
    self.nic_pnew = {}
8411
    self.nic_pinst = {}
8412
    for nic_op, nic_dict in self.op.nics:
8413
      if nic_op == constants.DDM_REMOVE:
8414
        if not instance.nics:
8415
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8416
                                     errors.ECODE_INVAL)
8417
        continue
8418
      if nic_op != constants.DDM_ADD:
8419
        # an existing nic
8420
        if not instance.nics:
8421
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8422
                                     " no NICs" % nic_op,
8423
                                     errors.ECODE_INVAL)
8424
        if nic_op < 0 or nic_op >= len(instance.nics):
8425
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8426
                                     " are 0 to %d" %
8427
                                     (nic_op, len(instance.nics) - 1),
8428
                                     errors.ECODE_INVAL)
8429
        old_nic_params = instance.nics[nic_op].nicparams
8430
        old_nic_ip = instance.nics[nic_op].ip
8431
      else:
8432
        old_nic_params = {}
8433
        old_nic_ip = None
8434

    
8435
      update_params_dict = dict([(key, nic_dict[key])
8436
                                 for key in constants.NICS_PARAMETERS
8437
                                 if key in nic_dict])
8438

    
8439
      if 'bridge' in nic_dict:
8440
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8441

    
8442
      new_nic_params, new_filled_nic_params = \
8443
          self._GetUpdatedParams(old_nic_params, update_params_dict,
8444
                                 cluster.nicparams[constants.PP_DEFAULT],
8445
                                 constants.NICS_PARAMETER_TYPES)
8446
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8447
      self.nic_pinst[nic_op] = new_nic_params
8448
      self.nic_pnew[nic_op] = new_filled_nic_params
8449
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8450

    
8451
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
8452
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8453
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8454
        if msg:
8455
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8456
          if self.force:
8457
            self.warn.append(msg)
8458
          else:
8459
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8460
      if new_nic_mode == constants.NIC_MODE_ROUTED:
8461
        if 'ip' in nic_dict:
8462
          nic_ip = nic_dict['ip']
8463
        else:
8464
          nic_ip = old_nic_ip
8465
        if nic_ip is None:
8466
          raise errors.OpPrereqError('Cannot set the nic ip to None'
8467
                                     ' on a routed nic', errors.ECODE_INVAL)
8468
      if 'mac' in nic_dict:
8469
        nic_mac = nic_dict['mac']
8470
        if nic_mac is None:
8471
          raise errors.OpPrereqError('Cannot set the nic mac to None',
8472
                                     errors.ECODE_INVAL)
8473
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8474
          # otherwise generate the mac
8475
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8476
        else:
8477
          # or validate/reserve the current one
8478
          try:
8479
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8480
          except errors.ReservationError:
8481
            raise errors.OpPrereqError("MAC address %s already in use"
8482
                                       " in cluster" % nic_mac,
8483
                                       errors.ECODE_NOTUNIQUE)
8484

    
8485
    # DISK processing
8486
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8487
      raise errors.OpPrereqError("Disk operations not supported for"
8488
                                 " diskless instances",
8489
                                 errors.ECODE_INVAL)
8490
    for disk_op, _ in self.op.disks:
8491
      if disk_op == constants.DDM_REMOVE:
8492
        if len(instance.disks) == 1:
8493
          raise errors.OpPrereqError("Cannot remove the last disk of"
8494
                                     " an instance", errors.ECODE_INVAL)
8495
        _CheckInstanceDown(self, instance, "cannot remove disks")
8496

    
8497
      if (disk_op == constants.DDM_ADD and
8498
          len(instance.nics) >= constants.MAX_DISKS):
8499
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8500
                                   " add more" % constants.MAX_DISKS,
8501
                                   errors.ECODE_STATE)
8502
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8503
        # an existing disk
8504
        if disk_op < 0 or disk_op >= len(instance.disks):
8505
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
8506
                                     " are 0 to %d" %
8507
                                     (disk_op, len(instance.disks)),
8508
                                     errors.ECODE_INVAL)
8509

    
8510
    # OS change
8511
    if self.op.os_name and not self.op.force:
8512
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8513
                      self.op.force_variant)
8514

    
8515
    return
8516

    
8517
  def _ConvertPlainToDrbd(self, feedback_fn):
8518
    """Converts an instance from plain to drbd.
8519

8520
    """
8521
    feedback_fn("Converting template to drbd")
8522
    instance = self.instance
8523
    pnode = instance.primary_node
8524
    snode = self.op.remote_node
8525

    
8526
    # create a fake disk info for _GenerateDiskTemplate
8527
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8528
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8529
                                      instance.name, pnode, [snode],
8530
                                      disk_info, None, None, 0)
8531
    info = _GetInstanceInfoText(instance)
8532
    feedback_fn("Creating aditional volumes...")
8533
    # first, create the missing data and meta devices
8534
    for disk in new_disks:
8535
      # unfortunately this is... not too nice
8536
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8537
                            info, True)
8538
      for child in disk.children:
8539
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
8540
    # at this stage, all new LVs have been created, we can rename the
8541
    # old ones
8542
    feedback_fn("Renaming original volumes...")
8543
    rename_list = [(o, n.children[0].logical_id)
8544
                   for (o, n) in zip(instance.disks, new_disks)]
8545
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
8546
    result.Raise("Failed to rename original LVs")
8547

    
8548
    feedback_fn("Initializing DRBD devices...")
8549
    # all child devices are in place, we can now create the DRBD devices
8550
    for disk in new_disks:
8551
      for node in [pnode, snode]:
8552
        f_create = node == pnode
8553
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8554

    
8555
    # at this point, the instance has been modified
8556
    instance.disk_template = constants.DT_DRBD8
8557
    instance.disks = new_disks
8558
    self.cfg.Update(instance, feedback_fn)
8559

    
8560
    # disks are created, waiting for sync
8561
    disk_abort = not _WaitForSync(self, instance)
8562
    if disk_abort:
8563
      raise errors.OpExecError("There are some degraded disks for"
8564
                               " this instance, please cleanup manually")
8565

    
8566
  def _ConvertDrbdToPlain(self, feedback_fn):
8567
    """Converts an instance from drbd to plain.
8568

8569
    """
8570
    instance = self.instance
8571
    assert len(instance.secondary_nodes) == 1
8572
    pnode = instance.primary_node
8573
    snode = instance.secondary_nodes[0]
8574
    feedback_fn("Converting template to plain")
8575

    
8576
    old_disks = instance.disks
8577
    new_disks = [d.children[0] for d in old_disks]
8578

    
8579
    # copy over size and mode
8580
    for parent, child in zip(old_disks, new_disks):
8581
      child.size = parent.size
8582
      child.mode = parent.mode
8583

    
8584
    # update instance structure
8585
    instance.disks = new_disks
8586
    instance.disk_template = constants.DT_PLAIN
8587
    self.cfg.Update(instance, feedback_fn)
8588

    
8589
    feedback_fn("Removing volumes on the secondary node...")
8590
    for disk in old_disks:
8591
      self.cfg.SetDiskID(disk, snode)
8592
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8593
      if msg:
8594
        self.LogWarning("Could not remove block device %s on node %s,"
8595
                        " continuing anyway: %s", disk.iv_name, snode, msg)
8596

    
8597
    feedback_fn("Removing unneeded volumes on the primary node...")
8598
    for idx, disk in enumerate(old_disks):
8599
      meta = disk.children[1]
8600
      self.cfg.SetDiskID(meta, pnode)
8601
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8602
      if msg:
8603
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
8604
                        " continuing anyway: %s", idx, pnode, msg)
8605

    
8606

    
8607
  def Exec(self, feedback_fn):
8608
    """Modifies an instance.
8609

8610
    All parameters take effect only at the next restart of the instance.
8611

8612
    """
8613
    # Process here the warnings from CheckPrereq, as we don't have a
8614
    # feedback_fn there.
8615
    for warn in self.warn:
8616
      feedback_fn("WARNING: %s" % warn)
8617

    
8618
    result = []
8619
    instance = self.instance
8620
    # disk changes
8621
    for disk_op, disk_dict in self.op.disks:
8622
      if disk_op == constants.DDM_REMOVE:
8623
        # remove the last disk
8624
        device = instance.disks.pop()
8625
        device_idx = len(instance.disks)
8626
        for node, disk in device.ComputeNodeTree(instance.primary_node):
8627
          self.cfg.SetDiskID(disk, node)
8628
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8629
          if msg:
8630
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
8631
                            " continuing anyway", device_idx, node, msg)
8632
        result.append(("disk/%d" % device_idx, "remove"))
8633
      elif disk_op == constants.DDM_ADD:
8634
        # add a new disk
8635
        if instance.disk_template == constants.DT_FILE:
8636
          file_driver, file_path = instance.disks[0].logical_id
8637
          file_path = os.path.dirname(file_path)
8638
        else:
8639
          file_driver = file_path = None
8640
        disk_idx_base = len(instance.disks)
8641
        new_disk = _GenerateDiskTemplate(self,
8642
                                         instance.disk_template,
8643
                                         instance.name, instance.primary_node,
8644
                                         instance.secondary_nodes,
8645
                                         [disk_dict],
8646
                                         file_path,
8647
                                         file_driver,
8648
                                         disk_idx_base)[0]
8649
        instance.disks.append(new_disk)
8650
        info = _GetInstanceInfoText(instance)
8651

    
8652
        logging.info("Creating volume %s for instance %s",
8653
                     new_disk.iv_name, instance.name)
8654
        # Note: this needs to be kept in sync with _CreateDisks
8655
        #HARDCODE
8656
        for node in instance.all_nodes:
8657
          f_create = node == instance.primary_node
8658
          try:
8659
            _CreateBlockDev(self, node, instance, new_disk,
8660
                            f_create, info, f_create)
8661
          except errors.OpExecError, err:
8662
            self.LogWarning("Failed to create volume %s (%s) on"
8663
                            " node %s: %s",
8664
                            new_disk.iv_name, new_disk, node, err)
8665
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8666
                       (new_disk.size, new_disk.mode)))
8667
      else:
8668
        # change a given disk
8669
        instance.disks[disk_op].mode = disk_dict['mode']
8670
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8671

    
8672
    if self.op.disk_template:
8673
      r_shut = _ShutdownInstanceDisks(self, instance)
8674
      if not r_shut:
8675
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8676
                                 " proceed with disk template conversion")
8677
      mode = (instance.disk_template, self.op.disk_template)
8678
      try:
8679
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
8680
      except:
8681
        self.cfg.ReleaseDRBDMinors(instance.name)
8682
        raise
8683
      result.append(("disk_template", self.op.disk_template))
8684

    
8685
    # NIC changes
8686
    for nic_op, nic_dict in self.op.nics:
8687
      if nic_op == constants.DDM_REMOVE:
8688
        # remove the last nic
8689
        del instance.nics[-1]
8690
        result.append(("nic.%d" % len(instance.nics), "remove"))
8691
      elif nic_op == constants.DDM_ADD:
8692
        # mac and bridge should be set, by now
8693
        mac = nic_dict['mac']
8694
        ip = nic_dict.get('ip', None)
8695
        nicparams = self.nic_pinst[constants.DDM_ADD]
8696
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8697
        instance.nics.append(new_nic)
8698
        result.append(("nic.%d" % (len(instance.nics) - 1),
8699
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
8700
                       (new_nic.mac, new_nic.ip,
8701
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8702
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8703
                       )))
8704
      else:
8705
        for key in 'mac', 'ip':
8706
          if key in nic_dict:
8707
            setattr(instance.nics[nic_op], key, nic_dict[key])
8708
        if nic_op in self.nic_pinst:
8709
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8710
        for key, val in nic_dict.iteritems():
8711
          result.append(("nic.%s/%d" % (key, nic_op), val))
8712

    
8713
    # hvparams changes
8714
    if self.op.hvparams:
8715
      instance.hvparams = self.hv_inst
8716
      for key, val in self.op.hvparams.iteritems():
8717
        result.append(("hv/%s" % key, val))
8718

    
8719
    # beparams changes
8720
    if self.op.beparams:
8721
      instance.beparams = self.be_inst
8722
      for key, val in self.op.beparams.iteritems():
8723
        result.append(("be/%s" % key, val))
8724

    
8725
    # OS change
8726
    if self.op.os_name:
8727
      instance.os = self.op.os_name
8728

    
8729
    self.cfg.Update(instance, feedback_fn)
8730

    
8731
    return result
8732

    
8733
  _DISK_CONVERSIONS = {
8734
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8735
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8736
    }
8737

    
8738
class LUQueryExports(NoHooksLU):
8739
  """Query the exports list
8740

8741
  """
8742
  _OP_REQP = ['nodes']
8743
  REQ_BGL = False
8744

    
8745
  def ExpandNames(self):
8746
    self.needed_locks = {}
8747
    self.share_locks[locking.LEVEL_NODE] = 1
8748
    if not self.op.nodes:
8749
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8750
    else:
8751
      self.needed_locks[locking.LEVEL_NODE] = \
8752
        _GetWantedNodes(self, self.op.nodes)
8753

    
8754
  def CheckPrereq(self):
8755
    """Check prerequisites.
8756

8757
    """
8758
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8759

    
8760
  def Exec(self, feedback_fn):
8761
    """Compute the list of all the exported system images.
8762

8763
    @rtype: dict
8764
    @return: a dictionary with the structure node->(export-list)
8765
        where export-list is a list of the instances exported on
8766
        that node.
8767

8768
    """
8769
    rpcresult = self.rpc.call_export_list(self.nodes)
8770
    result = {}
8771
    for node in rpcresult:
8772
      if rpcresult[node].fail_msg:
8773
        result[node] = False
8774
      else:
8775
        result[node] = rpcresult[node].payload
8776

    
8777
    return result
8778

    
8779

    
8780
class LUExportInstance(LogicalUnit):
8781
  """Export an instance to an image in the cluster.
8782

8783
  """
8784
  HPATH = "instance-export"
8785
  HTYPE = constants.HTYPE_INSTANCE
8786
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
8787
  REQ_BGL = False
8788

    
8789
  def CheckArguments(self):
8790
    """Check the arguments.
8791

8792
    """
8793
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8794
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
8795

    
8796
  def ExpandNames(self):
8797
    self._ExpandAndLockInstance()
8798
    # FIXME: lock only instance primary and destination node
8799
    #
8800
    # Sad but true, for now we have do lock all nodes, as we don't know where
8801
    # the previous export might be, and and in this LU we search for it and
8802
    # remove it from its current node. In the future we could fix this by:
8803
    #  - making a tasklet to search (share-lock all), then create the new one,
8804
    #    then one to remove, after
8805
    #  - removing the removal operation altogether
8806
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8807

    
8808
  def DeclareLocks(self, level):
8809
    """Last minute lock declaration."""
8810
    # All nodes are locked anyway, so nothing to do here.
8811

    
8812
  def BuildHooksEnv(self):
8813
    """Build hooks env.
8814

8815
    This will run on the master, primary node and target node.
8816

8817
    """
8818
    env = {
8819
      "EXPORT_NODE": self.op.target_node,
8820
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8821
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8822
      }
8823
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8824
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8825
          self.op.target_node]
8826
    return env, nl, nl
8827

    
8828
  def CheckPrereq(self):
8829
    """Check prerequisites.
8830

8831
    This checks that the instance and node names are valid.
8832

8833
    """
8834
    instance_name = self.op.instance_name
8835
    self.instance = self.cfg.GetInstanceInfo(instance_name)
8836
    assert self.instance is not None, \
8837
          "Cannot retrieve locked instance %s" % self.op.instance_name
8838
    _CheckNodeOnline(self, self.instance.primary_node)
8839

    
8840
    self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8841
    self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8842
    assert self.dst_node is not None
8843

    
8844
    _CheckNodeOnline(self, self.dst_node.name)
8845
    _CheckNodeNotDrained(self, self.dst_node.name)
8846

    
8847
    # instance disk type verification
8848
    for disk in self.instance.disks:
8849
      if disk.dev_type == constants.LD_FILE:
8850
        raise errors.OpPrereqError("Export not supported for instances with"
8851
                                   " file-based disks", errors.ECODE_INVAL)
8852

    
8853
  def Exec(self, feedback_fn):
8854
    """Export an instance to an image in the cluster.
8855

8856
    """
8857
    instance = self.instance
8858
    dst_node = self.dst_node
8859
    src_node = instance.primary_node
8860

    
8861
    if self.op.shutdown:
8862
      # shutdown the instance, but not the disks
8863
      feedback_fn("Shutting down instance %s" % instance.name)
8864
      result = self.rpc.call_instance_shutdown(src_node, instance,
8865
                                               self.shutdown_timeout)
8866
      result.Raise("Could not shutdown instance %s on"
8867
                   " node %s" % (instance.name, src_node))
8868

    
8869
    vgname = self.cfg.GetVGName()
8870

    
8871
    snap_disks = []
8872

    
8873
    # set the disks ID correctly since call_instance_start needs the
8874
    # correct drbd minor to create the symlinks
8875
    for disk in instance.disks:
8876
      self.cfg.SetDiskID(disk, src_node)
8877

    
8878
    activate_disks = (not instance.admin_up)
8879

    
8880
    if activate_disks:
8881
      # Activate the instance disks if we'exporting a stopped instance
8882
      feedback_fn("Activating disks for %s" % instance.name)
8883
      _StartInstanceDisks(self, instance, None)
8884

    
8885
    try:
8886
      # per-disk results
8887
      dresults = []
8888
      try:
8889
        for idx, disk in enumerate(instance.disks):
8890
          feedback_fn("Creating a snapshot of disk/%s on node %s" %
8891
                      (idx, src_node))
8892

    
8893
          # result.payload will be a snapshot of an lvm leaf of the one we
8894
          # passed
8895
          result = self.rpc.call_blockdev_snapshot(src_node, disk)
8896
          msg = result.fail_msg
8897
          if msg:
8898
            self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8899
                            idx, src_node, msg)
8900
            snap_disks.append(False)
8901
          else:
8902
            disk_id = (vgname, result.payload)
8903
            new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8904
                                   logical_id=disk_id, physical_id=disk_id,
8905
                                   iv_name=disk.iv_name)
8906
            snap_disks.append(new_dev)
8907

    
8908
      finally:
8909
        if self.op.shutdown and instance.admin_up:
8910
          feedback_fn("Starting instance %s" % instance.name)
8911
          result = self.rpc.call_instance_start(src_node, instance, None, None)
8912
          msg = result.fail_msg
8913
          if msg:
8914
            _ShutdownInstanceDisks(self, instance)
8915
            raise errors.OpExecError("Could not start instance: %s" % msg)
8916

    
8917
      # TODO: check for size
8918

    
8919
      cluster_name = self.cfg.GetClusterName()
8920
      for idx, dev in enumerate(snap_disks):
8921
        feedback_fn("Exporting snapshot %s from %s to %s" %
8922
                    (idx, src_node, dst_node.name))
8923
        if dev:
8924
          # FIXME: pass debug from opcode to backend
8925
          result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
8926
                                                 instance, cluster_name,
8927
                                                 idx, self.op.debug_level)
8928
          msg = result.fail_msg
8929
          if msg:
8930
            self.LogWarning("Could not export disk/%s from node %s to"
8931
                            " node %s: %s", idx, src_node, dst_node.name, msg)
8932
            dresults.append(False)
8933
          else:
8934
            dresults.append(True)
8935
          msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
8936
          if msg:
8937
            self.LogWarning("Could not remove snapshot for disk/%d from node"
8938
                            " %s: %s", idx, src_node, msg)
8939
        else:
8940
          dresults.append(False)
8941

    
8942
      feedback_fn("Finalizing export on %s" % dst_node.name)
8943
      result = self.rpc.call_finalize_export(dst_node.name, instance,
8944
                                             snap_disks)
8945
      fin_resu = True
8946
      msg = result.fail_msg
8947
      if msg:
8948
        self.LogWarning("Could not finalize export for instance %s"
8949
                        " on node %s: %s", instance.name, dst_node.name, msg)
8950
        fin_resu = False
8951

    
8952
    finally:
8953
      if activate_disks:
8954
        feedback_fn("Deactivating disks for %s" % instance.name)
8955
        _ShutdownInstanceDisks(self, instance)
8956

    
8957
    nodelist = self.cfg.GetNodeList()
8958
    nodelist.remove(dst_node.name)
8959

    
8960
    # on one-node clusters nodelist will be empty after the removal
8961
    # if we proceed the backup would be removed because OpQueryExports
8962
    # substitutes an empty list with the full cluster node list.
8963
    iname = instance.name
8964
    if nodelist:
8965
      feedback_fn("Removing old exports for instance %s" % iname)
8966
      exportlist = self.rpc.call_export_list(nodelist)
8967
      for node in exportlist:
8968
        if exportlist[node].fail_msg:
8969
          continue
8970
        if iname in exportlist[node].payload:
8971
          msg = self.rpc.call_export_remove(node, iname).fail_msg
8972
          if msg:
8973
            self.LogWarning("Could not remove older export for instance %s"
8974
                            " on node %s: %s", iname, node, msg)
8975
    return fin_resu, dresults
8976

    
8977

    
8978
class LURemoveExport(NoHooksLU):
8979
  """Remove exports related to the named instance.
8980

8981
  """
8982
  _OP_REQP = ["instance_name"]
8983
  REQ_BGL = False
8984

    
8985
  def ExpandNames(self):
8986
    self.needed_locks = {}
8987
    # We need all nodes to be locked in order for RemoveExport to work, but we
8988
    # don't need to lock the instance itself, as nothing will happen to it (and
8989
    # we can remove exports also for a removed instance)
8990
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8991

    
8992
  def CheckPrereq(self):
8993
    """Check prerequisites.
8994
    """
8995
    pass
8996

    
8997
  def Exec(self, feedback_fn):
8998
    """Remove any export.
8999

9000
    """
9001
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9002
    # If the instance was not found we'll try with the name that was passed in.
9003
    # This will only work if it was an FQDN, though.
9004
    fqdn_warn = False
9005
    if not instance_name:
9006
      fqdn_warn = True
9007
      instance_name = self.op.instance_name
9008

    
9009
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9010
    exportlist = self.rpc.call_export_list(locked_nodes)
9011
    found = False
9012
    for node in exportlist:
9013
      msg = exportlist[node].fail_msg
9014
      if msg:
9015
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9016
        continue
9017
      if instance_name in exportlist[node].payload:
9018
        found = True
9019
        result = self.rpc.call_export_remove(node, instance_name)
9020
        msg = result.fail_msg
9021
        if msg:
9022
          logging.error("Could not remove export for instance %s"
9023
                        " on node %s: %s", instance_name, node, msg)
9024

    
9025
    if fqdn_warn and not found:
9026
      feedback_fn("Export not found. If trying to remove an export belonging"
9027
                  " to a deleted instance please use its Fully Qualified"
9028
                  " Domain Name.")
9029

    
9030

    
9031
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9032
  """Generic tags LU.
9033

9034
  This is an abstract class which is the parent of all the other tags LUs.
9035

9036
  """
9037

    
9038
  def ExpandNames(self):
9039
    self.needed_locks = {}
9040
    if self.op.kind == constants.TAG_NODE:
9041
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9042
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
9043
    elif self.op.kind == constants.TAG_INSTANCE:
9044
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9045
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9046

    
9047
  def CheckPrereq(self):
9048
    """Check prerequisites.
9049

9050
    """
9051
    if self.op.kind == constants.TAG_CLUSTER:
9052
      self.target = self.cfg.GetClusterInfo()
9053
    elif self.op.kind == constants.TAG_NODE:
9054
      self.target = self.cfg.GetNodeInfo(self.op.name)
9055
    elif self.op.kind == constants.TAG_INSTANCE:
9056
      self.target = self.cfg.GetInstanceInfo(self.op.name)
9057
    else:
9058
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9059
                                 str(self.op.kind), errors.ECODE_INVAL)
9060

    
9061

    
9062
class LUGetTags(TagsLU):
9063
  """Returns the tags of a given object.
9064

9065
  """
9066
  _OP_REQP = ["kind", "name"]
9067
  REQ_BGL = False
9068

    
9069
  def Exec(self, feedback_fn):
9070
    """Returns the tag list.
9071

9072
    """
9073
    return list(self.target.GetTags())
9074

    
9075

    
9076
class LUSearchTags(NoHooksLU):
9077
  """Searches the tags for a given pattern.
9078

9079
  """
9080
  _OP_REQP = ["pattern"]
9081
  REQ_BGL = False
9082

    
9083
  def ExpandNames(self):
9084
    self.needed_locks = {}
9085

    
9086
  def CheckPrereq(self):
9087
    """Check prerequisites.
9088

9089
    This checks the pattern passed for validity by compiling it.
9090

9091
    """
9092
    try:
9093
      self.re = re.compile(self.op.pattern)
9094
    except re.error, err:
9095
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9096
                                 (self.op.pattern, err), errors.ECODE_INVAL)
9097

    
9098
  def Exec(self, feedback_fn):
9099
    """Returns the tag list.
9100

9101
    """
9102
    cfg = self.cfg
9103
    tgts = [("/cluster", cfg.GetClusterInfo())]
9104
    ilist = cfg.GetAllInstancesInfo().values()
9105
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9106
    nlist = cfg.GetAllNodesInfo().values()
9107
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9108
    results = []
9109
    for path, target in tgts:
9110
      for tag in target.GetTags():
9111
        if self.re.search(tag):
9112
          results.append((path, tag))
9113
    return results
9114

    
9115

    
9116
class LUAddTags(TagsLU):
9117
  """Sets a tag on a given object.
9118

9119
  """
9120
  _OP_REQP = ["kind", "name", "tags"]
9121
  REQ_BGL = False
9122

    
9123
  def CheckPrereq(self):
9124
    """Check prerequisites.
9125

9126
    This checks the type and length of the tag name and value.
9127

9128
    """
9129
    TagsLU.CheckPrereq(self)
9130
    for tag in self.op.tags:
9131
      objects.TaggableObject.ValidateTag(tag)
9132

    
9133
  def Exec(self, feedback_fn):
9134
    """Sets the tag.
9135

9136
    """
9137
    try:
9138
      for tag in self.op.tags:
9139
        self.target.AddTag(tag)
9140
    except errors.TagError, err:
9141
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
9142
    self.cfg.Update(self.target, feedback_fn)
9143

    
9144

    
9145
class LUDelTags(TagsLU):
9146
  """Delete a list of tags from a given object.
9147

9148
  """
9149
  _OP_REQP = ["kind", "name", "tags"]
9150
  REQ_BGL = False
9151

    
9152
  def CheckPrereq(self):
9153
    """Check prerequisites.
9154

9155
    This checks that we have the given tag.
9156

9157
    """
9158
    TagsLU.CheckPrereq(self)
9159
    for tag in self.op.tags:
9160
      objects.TaggableObject.ValidateTag(tag)
9161
    del_tags = frozenset(self.op.tags)
9162
    cur_tags = self.target.GetTags()
9163
    if not del_tags <= cur_tags:
9164
      diff_tags = del_tags - cur_tags
9165
      diff_names = ["'%s'" % tag for tag in diff_tags]
9166
      diff_names.sort()
9167
      raise errors.OpPrereqError("Tag(s) %s not found" %
9168
                                 (",".join(diff_names)), errors.ECODE_NOENT)
9169

    
9170
  def Exec(self, feedback_fn):
9171
    """Remove the tag from the object.
9172

9173
    """
9174
    for tag in self.op.tags:
9175
      self.target.RemoveTag(tag)
9176
    self.cfg.Update(self.target, feedback_fn)
9177

    
9178

    
9179
class LUTestDelay(NoHooksLU):
9180
  """Sleep for a specified amount of time.
9181

9182
  This LU sleeps on the master and/or nodes for a specified amount of
9183
  time.
9184

9185
  """
9186
  _OP_REQP = ["duration", "on_master", "on_nodes"]
9187
  REQ_BGL = False
9188

    
9189
  def ExpandNames(self):
9190
    """Expand names and set required locks.
9191

9192
    This expands the node list, if any.
9193

9194
    """
9195
    self.needed_locks = {}
9196
    if self.op.on_nodes:
9197
      # _GetWantedNodes can be used here, but is not always appropriate to use
9198
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9199
      # more information.
9200
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9201
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9202

    
9203
  def CheckPrereq(self):
9204
    """Check prerequisites.
9205

9206
    """
9207

    
9208
  def Exec(self, feedback_fn):
9209
    """Do the actual sleep.
9210

9211
    """
9212
    if self.op.on_master:
9213
      if not utils.TestDelay(self.op.duration):
9214
        raise errors.OpExecError("Error during master delay test")
9215
    if self.op.on_nodes:
9216
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9217
      for node, node_result in result.items():
9218
        node_result.Raise("Failure during rpc call to node %s" % node)
9219

    
9220

    
9221
class IAllocator(object):
9222
  """IAllocator framework.
9223

9224
  An IAllocator instance has three sets of attributes:
9225
    - cfg that is needed to query the cluster
9226
    - input data (all members of the _KEYS class attribute are required)
9227
    - four buffer attributes (in|out_data|text), that represent the
9228
      input (to the external script) in text and data structure format,
9229
      and the output from it, again in two formats
9230
    - the result variables from the script (success, info, nodes) for
9231
      easy usage
9232

9233
  """
9234
  # pylint: disable-msg=R0902
9235
  # lots of instance attributes
9236
  _ALLO_KEYS = [
9237
    "name", "mem_size", "disks", "disk_template",
9238
    "os", "tags", "nics", "vcpus", "hypervisor",
9239
    ]
9240
  _RELO_KEYS = [
9241
    "name", "relocate_from",
9242
    ]
9243
  _EVAC_KEYS = [
9244
    "evac_nodes",
9245
    ]
9246

    
9247
  def __init__(self, cfg, rpc, mode, **kwargs):
9248
    self.cfg = cfg
9249
    self.rpc = rpc
9250
    # init buffer variables
9251
    self.in_text = self.out_text = self.in_data = self.out_data = None
9252
    # init all input fields so that pylint is happy
9253
    self.mode = mode
9254
    self.mem_size = self.disks = self.disk_template = None
9255
    self.os = self.tags = self.nics = self.vcpus = None
9256
    self.hypervisor = None
9257
    self.relocate_from = None
9258
    self.name = None
9259
    self.evac_nodes = None
9260
    # computed fields
9261
    self.required_nodes = None
9262
    # init result fields
9263
    self.success = self.info = self.result = None
9264
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9265
      keyset = self._ALLO_KEYS
9266
      fn = self._AddNewInstance
9267
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9268
      keyset = self._RELO_KEYS
9269
      fn = self._AddRelocateInstance
9270
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9271
      keyset = self._EVAC_KEYS
9272
      fn = self._AddEvacuateNodes
9273
    else:
9274
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9275
                                   " IAllocator" % self.mode)
9276
    for key in kwargs:
9277
      if key not in keyset:
9278
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
9279
                                     " IAllocator" % key)
9280
      setattr(self, key, kwargs[key])
9281

    
9282
    for key in keyset:
9283
      if key not in kwargs:
9284
        raise errors.ProgrammerError("Missing input parameter '%s' to"
9285
                                     " IAllocator" % key)
9286
    self._BuildInputData(fn)
9287

    
9288
  def _ComputeClusterData(self):
9289
    """Compute the generic allocator input data.
9290

9291
    This is the data that is independent of the actual operation.
9292

9293
    """
9294
    cfg = self.cfg
9295
    cluster_info = cfg.GetClusterInfo()
9296
    # cluster data
9297
    data = {
9298
      "version": constants.IALLOCATOR_VERSION,
9299
      "cluster_name": cfg.GetClusterName(),
9300
      "cluster_tags": list(cluster_info.GetTags()),
9301
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9302
      # we don't have job IDs
9303
      }
9304
    iinfo = cfg.GetAllInstancesInfo().values()
9305
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9306

    
9307
    # node data
9308
    node_results = {}
9309
    node_list = cfg.GetNodeList()
9310

    
9311
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9312
      hypervisor_name = self.hypervisor
9313
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9314
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9315
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9316
      hypervisor_name = cluster_info.enabled_hypervisors[0]
9317

    
9318
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9319
                                        hypervisor_name)
9320
    node_iinfo = \
9321
      self.rpc.call_all_instances_info(node_list,
9322
                                       cluster_info.enabled_hypervisors)
9323
    for nname, nresult in node_data.items():
9324
      # first fill in static (config-based) values
9325
      ninfo = cfg.GetNodeInfo(nname)
9326
      pnr = {
9327
        "tags": list(ninfo.GetTags()),
9328
        "primary_ip": ninfo.primary_ip,
9329
        "secondary_ip": ninfo.secondary_ip,
9330
        "offline": ninfo.offline,
9331
        "drained": ninfo.drained,
9332
        "master_candidate": ninfo.master_candidate,
9333
        }
9334

    
9335
      if not (ninfo.offline or ninfo.drained):
9336
        nresult.Raise("Can't get data for node %s" % nname)
9337
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9338
                                nname)
9339
        remote_info = nresult.payload
9340

    
9341
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
9342
                     'vg_size', 'vg_free', 'cpu_total']:
9343
          if attr not in remote_info:
9344
            raise errors.OpExecError("Node '%s' didn't return attribute"
9345
                                     " '%s'" % (nname, attr))
9346
          if not isinstance(remote_info[attr], int):
9347
            raise errors.OpExecError("Node '%s' returned invalid value"
9348
                                     " for '%s': %s" %
9349
                                     (nname, attr, remote_info[attr]))
9350
        # compute memory used by primary instances
9351
        i_p_mem = i_p_up_mem = 0
9352
        for iinfo, beinfo in i_list:
9353
          if iinfo.primary_node == nname:
9354
            i_p_mem += beinfo[constants.BE_MEMORY]
9355
            if iinfo.name not in node_iinfo[nname].payload:
9356
              i_used_mem = 0
9357
            else:
9358
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9359
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9360
            remote_info['memory_free'] -= max(0, i_mem_diff)
9361

    
9362
            if iinfo.admin_up:
9363
              i_p_up_mem += beinfo[constants.BE_MEMORY]
9364

    
9365
        # compute memory used by instances
9366
        pnr_dyn = {
9367
          "total_memory": remote_info['memory_total'],
9368
          "reserved_memory": remote_info['memory_dom0'],
9369
          "free_memory": remote_info['memory_free'],
9370
          "total_disk": remote_info['vg_size'],
9371
          "free_disk": remote_info['vg_free'],
9372
          "total_cpus": remote_info['cpu_total'],
9373
          "i_pri_memory": i_p_mem,
9374
          "i_pri_up_memory": i_p_up_mem,
9375
          }
9376
        pnr.update(pnr_dyn)
9377

    
9378
      node_results[nname] = pnr
9379
    data["nodes"] = node_results
9380

    
9381
    # instance data
9382
    instance_data = {}
9383
    for iinfo, beinfo in i_list:
9384
      nic_data = []
9385
      for nic in iinfo.nics:
9386
        filled_params = objects.FillDict(
9387
            cluster_info.nicparams[constants.PP_DEFAULT],
9388
            nic.nicparams)
9389
        nic_dict = {"mac": nic.mac,
9390
                    "ip": nic.ip,
9391
                    "mode": filled_params[constants.NIC_MODE],
9392
                    "link": filled_params[constants.NIC_LINK],
9393
                   }
9394
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9395
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9396
        nic_data.append(nic_dict)
9397
      pir = {
9398
        "tags": list(iinfo.GetTags()),
9399
        "admin_up": iinfo.admin_up,
9400
        "vcpus": beinfo[constants.BE_VCPUS],
9401
        "memory": beinfo[constants.BE_MEMORY],
9402
        "os": iinfo.os,
9403
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9404
        "nics": nic_data,
9405
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9406
        "disk_template": iinfo.disk_template,
9407
        "hypervisor": iinfo.hypervisor,
9408
        }
9409
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9410
                                                 pir["disks"])
9411
      instance_data[iinfo.name] = pir
9412

    
9413
    data["instances"] = instance_data
9414

    
9415
    self.in_data = data
9416

    
9417
  def _AddNewInstance(self):
9418
    """Add new instance data to allocator structure.
9419

9420
    This in combination with _AllocatorGetClusterData will create the
9421
    correct structure needed as input for the allocator.
9422

9423
    The checks for the completeness of the opcode must have already been
9424
    done.
9425

9426
    """
9427
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9428

    
9429
    if self.disk_template in constants.DTS_NET_MIRROR:
9430
      self.required_nodes = 2
9431
    else:
9432
      self.required_nodes = 1
9433
    request = {
9434
      "name": self.name,
9435
      "disk_template": self.disk_template,
9436
      "tags": self.tags,
9437
      "os": self.os,
9438
      "vcpus": self.vcpus,
9439
      "memory": self.mem_size,
9440
      "disks": self.disks,
9441
      "disk_space_total": disk_space,
9442
      "nics": self.nics,
9443
      "required_nodes": self.required_nodes,
9444
      }
9445
    return request
9446

    
9447
  def _AddRelocateInstance(self):
9448
    """Add relocate instance data to allocator structure.
9449

9450
    This in combination with _IAllocatorGetClusterData will create the
9451
    correct structure needed as input for the allocator.
9452

9453
    The checks for the completeness of the opcode must have already been
9454
    done.
9455

9456
    """
9457
    instance = self.cfg.GetInstanceInfo(self.name)
9458
    if instance is None:
9459
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
9460
                                   " IAllocator" % self.name)
9461

    
9462
    if instance.disk_template not in constants.DTS_NET_MIRROR:
9463
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9464
                                 errors.ECODE_INVAL)
9465

    
9466
    if len(instance.secondary_nodes) != 1:
9467
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
9468
                                 errors.ECODE_STATE)
9469

    
9470
    self.required_nodes = 1
9471
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
9472
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9473

    
9474
    request = {
9475
      "name": self.name,
9476
      "disk_space_total": disk_space,
9477
      "required_nodes": self.required_nodes,
9478
      "relocate_from": self.relocate_from,
9479
      }
9480
    return request
9481

    
9482
  def _AddEvacuateNodes(self):
9483
    """Add evacuate nodes data to allocator structure.
9484

9485
    """
9486
    request = {
9487
      "evac_nodes": self.evac_nodes
9488
      }
9489
    return request
9490

    
9491
  def _BuildInputData(self, fn):
9492
    """Build input data structures.
9493

9494
    """
9495
    self._ComputeClusterData()
9496

    
9497
    request = fn()
9498
    request["type"] = self.mode
9499
    self.in_data["request"] = request
9500

    
9501
    self.in_text = serializer.Dump(self.in_data)
9502

    
9503
  def Run(self, name, validate=True, call_fn=None):
9504
    """Run an instance allocator and return the results.
9505

9506
    """
9507
    if call_fn is None:
9508
      call_fn = self.rpc.call_iallocator_runner
9509

    
9510
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9511
    result.Raise("Failure while running the iallocator script")
9512

    
9513
    self.out_text = result.payload
9514
    if validate:
9515
      self._ValidateResult()
9516

    
9517
  def _ValidateResult(self):
9518
    """Process the allocator results.
9519

9520
    This will process and if successful save the result in
9521
    self.out_data and the other parameters.
9522

9523
    """
9524
    try:
9525
      rdict = serializer.Load(self.out_text)
9526
    except Exception, err:
9527
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9528

    
9529
    if not isinstance(rdict, dict):
9530
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
9531

    
9532
    # TODO: remove backwards compatiblity in later versions
9533
    if "nodes" in rdict and "result" not in rdict:
9534
      rdict["result"] = rdict["nodes"]
9535
      del rdict["nodes"]
9536

    
9537
    for key in "success", "info", "result":
9538
      if key not in rdict:
9539
        raise errors.OpExecError("Can't parse iallocator results:"
9540
                                 " missing key '%s'" % key)
9541
      setattr(self, key, rdict[key])
9542

    
9543
    if not isinstance(rdict["result"], list):
9544
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9545
                               " is not a list")
9546
    self.out_data = rdict
9547

    
9548

    
9549
class LUTestAllocator(NoHooksLU):
9550
  """Run allocator tests.
9551

9552
  This LU runs the allocator tests
9553

9554
  """
9555
  _OP_REQP = ["direction", "mode", "name"]
9556

    
9557
  def CheckPrereq(self):
9558
    """Check prerequisites.
9559

9560
    This checks the opcode parameters depending on the director and mode test.
9561

9562
    """
9563
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9564
      for attr in ["name", "mem_size", "disks", "disk_template",
9565
                   "os", "tags", "nics", "vcpus"]:
9566
        if not hasattr(self.op, attr):
9567
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9568
                                     attr, errors.ECODE_INVAL)
9569
      iname = self.cfg.ExpandInstanceName(self.op.name)
9570
      if iname is not None:
9571
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9572
                                   iname, errors.ECODE_EXISTS)
9573
      if not isinstance(self.op.nics, list):
9574
        raise errors.OpPrereqError("Invalid parameter 'nics'",
9575
                                   errors.ECODE_INVAL)
9576
      for row in self.op.nics:
9577
        if (not isinstance(row, dict) or
9578
            "mac" not in row or
9579
            "ip" not in row or
9580
            "bridge" not in row):
9581
          raise errors.OpPrereqError("Invalid contents of the 'nics'"
9582
                                     " parameter", errors.ECODE_INVAL)
9583
      if not isinstance(self.op.disks, list):
9584
        raise errors.OpPrereqError("Invalid parameter 'disks'",
9585
                                   errors.ECODE_INVAL)
9586
      for row in self.op.disks:
9587
        if (not isinstance(row, dict) or
9588
            "size" not in row or
9589
            not isinstance(row["size"], int) or
9590
            "mode" not in row or
9591
            row["mode"] not in ['r', 'w']):
9592
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
9593
                                     " parameter", errors.ECODE_INVAL)
9594
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9595
        self.op.hypervisor = self.cfg.GetHypervisorType()
9596
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9597
      if not hasattr(self.op, "name"):
9598
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9599
                                   errors.ECODE_INVAL)
9600
      fname = _ExpandInstanceName(self.cfg, self.op.name)
9601
      self.op.name = fname
9602
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9603
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9604
      if not hasattr(self.op, "evac_nodes"):
9605
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9606
                                   " opcode input", errors.ECODE_INVAL)
9607
    else:
9608
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9609
                                 self.op.mode, errors.ECODE_INVAL)
9610

    
9611
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9612
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
9613
        raise errors.OpPrereqError("Missing allocator name",
9614
                                   errors.ECODE_INVAL)
9615
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9616
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
9617
                                 self.op.direction, errors.ECODE_INVAL)
9618

    
9619
  def Exec(self, feedback_fn):
9620
    """Run the allocator test.
9621

9622
    """
9623
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9624
      ial = IAllocator(self.cfg, self.rpc,
9625
                       mode=self.op.mode,
9626
                       name=self.op.name,
9627
                       mem_size=self.op.mem_size,
9628
                       disks=self.op.disks,
9629
                       disk_template=self.op.disk_template,
9630
                       os=self.op.os,
9631
                       tags=self.op.tags,
9632
                       nics=self.op.nics,
9633
                       vcpus=self.op.vcpus,
9634
                       hypervisor=self.op.hypervisor,
9635
                       )
9636
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9637
      ial = IAllocator(self.cfg, self.rpc,
9638
                       mode=self.op.mode,
9639
                       name=self.op.name,
9640
                       relocate_from=list(self.relocate_from),
9641
                       )
9642
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9643
      ial = IAllocator(self.cfg, self.rpc,
9644
                       mode=self.op.mode,
9645
                       evac_nodes=self.op.evac_nodes)
9646
    else:
9647
      raise errors.ProgrammerError("Uncatched mode %s in"
9648
                                   " LUTestAllocator.Exec", self.op.mode)
9649

    
9650
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
9651
      result = ial.in_text
9652
    else:
9653
      ial.Run(self.op.allocator, validate=False)
9654
      result = ial.out_text
9655
    return result